From 5d23defa913fd80a15bf1ec74cb1deca2b845b5f Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 22 Feb 2022 23:37:19 +0100
Subject: [PATCH] Add C versions as fallback

---
 lapack-netlib/SRC/sgbtrs.c              |  684 ++++
 lapack-netlib/SRC/sgebak.c              |  674 ++++
 lapack-netlib/SRC/sgebal.c              |  838 +++++
 lapack-netlib/SRC/sgebd2.c              |  744 ++++
 lapack-netlib/SRC/sgebrd.c              |  783 ++++
 lapack-netlib/SRC/sgecon.c              |  656 ++++
 lapack-netlib/SRC/sgeequ.c              |  733 ++++
 lapack-netlib/SRC/sgeequb.c             |  753 ++++
 lapack-netlib/SRC/sgees.c               | 1001 +++++
 lapack-netlib/SRC/sgeesx.c              | 1120 ++++++
 lapack-netlib/SRC/sgeev.c               | 1029 ++++++
 lapack-netlib/SRC/sgeevx.c              | 1197 ++++++
 lapack-netlib/SRC/sgehd2.c              |  627 ++++
 lapack-netlib/SRC/sgehrd.c              |  782 ++++
 lapack-netlib/SRC/sgejsv.c              | 2678 ++++++++++++++
 lapack-netlib/SRC/sgelq.c               |  742 ++++
 lapack-netlib/SRC/sgelq2.c              |  597 +++
 lapack-netlib/SRC/sgelqf.c              |  698 ++++
 lapack-netlib/SRC/sgelqt.c              |  601 +++
 lapack-netlib/SRC/sgelqt3.c             |  657 ++++
 lapack-netlib/SRC/sgels.c               |  954 +++++
 lapack-netlib/SRC/sgelss.c              | 1308 +++++++
 lapack-netlib/SRC/sgelsy.c              |  939 +++++
 lapack-netlib/SRC/sgemlq.c              |  681 ++++
 lapack-netlib/SRC/sgemlqt.c             |  686 ++++
 lapack-netlib/SRC/sgemqr.c              |  683 ++++
 lapack-netlib/SRC/sgemqrt.c             |  706 ++++
 lapack-netlib/SRC/sgeql2.c              |  591 +++
 lapack-netlib/SRC/sgeqlf.c              |  709 ++++
 lapack-netlib/SRC/sgeqp3.c              |  789 ++++
 lapack-netlib/SRC/sgeqr.c               |  733 ++++
 lapack-netlib/SRC/sgeqr2.c              |  602 +++
 lapack-netlib/SRC/sgeqr2p.c             |  607 +++
 lapack-netlib/SRC/sgeqrf.c              |  700 ++++
 lapack-netlib/SRC/sgeqrfp.c             |  703 ++++
 lapack-netlib/SRC/sgeqrt.c              |  628 ++++
 lapack-netlib/SRC/sgeqrt2.c             |  645 ++++
 lapack-netlib/SRC/sgeqrt3.c             |  678 ++++
 lapack-netlib/SRC/sgerfs.c              |  879 +++++
 lapack-netlib/SRC/sgerfsx.c             | 1146 ++++++
 lapack-netlib/SRC/sgerq2.c              |  587 +++
 lapack-netlib/SRC/sgerqf.c              |  711 ++++
 lapack-netlib/SRC/sgesc2.c              |  604 +++
 lapack-netlib/SRC/sgesdd.c              | 2165 +++++++++++
 lapack-netlib/SRC/sgesv.c               |  576 +++
 lapack-netlib/SRC/sgesvd.c              | 4471 +++++++++++++++++++++++
 lapack-netlib/SRC/sgesvdq.c             | 2124 +++++++++++
 lapack-netlib/SRC/sgesvdx.c             | 1339 +++++++
 lapack-netlib/SRC/sgesvj.c              | 2210 +++++++++++
 lapack-netlib/SRC/sgesvx.c              | 1063 ++++++
 lapack-netlib/SRC/sgesvxx.c             | 1210 ++++++
 lapack-netlib/SRC/sgetc2.c              |  645 ++++
 lapack-netlib/SRC/sgetf2.c              |  618 ++++
 lapack-netlib/SRC/sgetrf.c              |  643 ++++
 lapack-netlib/SRC/sgetrf2.c             |  681 ++++
 lapack-netlib/SRC/sgetri.c              |  690 ++++
 lapack-netlib/SRC/sgetrs.c              |  619 ++++
 lapack-netlib/SRC/sgetsls.c             |  929 +++++
 lapack-netlib/SRC/sgetsqrhrt.c          |  765 ++++
 lapack-netlib/SRC/sggbak.c              |  720 ++++
 lapack-netlib/SRC/sggbal.c              | 1069 ++++++
 lapack-netlib/SRC/sgges.c               | 1162 ++++++
 lapack-netlib/SRC/sgges3.c              | 1163 ++++++
 lapack-netlib/SRC/sggesx.c              | 1308 +++++++
 lapack-netlib/SRC/sggev.c               | 1099 ++++++
 lapack-netlib/SRC/sggev3.c              | 1111 ++++++
 lapack-netlib/SRC/sggevx.c              | 1385 +++++++
 lapack-netlib/SRC/sggglm.c              |  787 ++++
 lapack-netlib/SRC/sgghd3.c              | 1452 ++++++++
 lapack-netlib/SRC/sgghrd.c              |  784 ++++
 lapack-netlib/SRC/sgglse.c              |  786 ++++
 lapack-netlib/SRC/sggqrf.c              |  718 ++++
 lapack-netlib/SRC/sggrqf.c              |  719 ++++
 lapack-netlib/SRC/sggsvd3.c             |  936 +++++
 lapack-netlib/SRC/sggsvp3.c             | 1056 ++++++
 lapack-netlib/SRC/sgsvj0.c              | 1592 ++++++++
 lapack-netlib/SRC/sgsvj1.c              | 1234 +++++++
 lapack-netlib/SRC/sgtcon.c              |  649 ++++
 lapack-netlib/SRC/sgtrfs.c              |  913 +++++
 lapack-netlib/SRC/sgtsv.c               |  745 ++++
 lapack-netlib/SRC/sgtsvx.c              |  828 +++++
 lapack-netlib/SRC/sgttrf.c              |  632 ++++
 lapack-netlib/SRC/sgttrs.c              |  632 ++++
 lapack-netlib/SRC/sgtts2.c              |  706 ++++
 lapack-netlib/SRC/shgeqz.c              | 1976 ++++++++++
 lapack-netlib/SRC/shsein.c              |  968 +++++
 lapack-netlib/SRC/shseqr.c              |  941 +++++
 lapack-netlib/SRC/sisnan.c              |  469 +++
 lapack-netlib/SRC/sla_gbamv.c           |  815 +++++
 lapack-netlib/SRC/sla_gbrcond.c         |  791 ++++
 lapack-netlib/SRC/sla_gbrfsx_extended.c | 1135 ++++++
 lapack-netlib/SRC/sla_gbrpvgrw.c        |  569 +++
 lapack-netlib/SRC/sla_geamv.c           |  779 ++++
 lapack-netlib/SRC/sla_gercond.c         |  735 ++++
 lapack-netlib/SRC/sla_gerfsx_extended.c | 1119 ++++++
 lapack-netlib/SRC/sla_gerpvgrw.c        |  542 +++
 96 files changed, 90636 insertions(+)
 create mode 100644 lapack-netlib/SRC/sgbtrs.c
 create mode 100644 lapack-netlib/SRC/sgebak.c
 create mode 100644 lapack-netlib/SRC/sgebal.c
 create mode 100644 lapack-netlib/SRC/sgebd2.c
 create mode 100644 lapack-netlib/SRC/sgebrd.c
 create mode 100644 lapack-netlib/SRC/sgecon.c
 create mode 100644 lapack-netlib/SRC/sgeequ.c
 create mode 100644 lapack-netlib/SRC/sgeequb.c
 create mode 100644 lapack-netlib/SRC/sgees.c
 create mode 100644 lapack-netlib/SRC/sgeesx.c
 create mode 100644 lapack-netlib/SRC/sgeev.c
 create mode 100644 lapack-netlib/SRC/sgeevx.c
 create mode 100644 lapack-netlib/SRC/sgehd2.c
 create mode 100644 lapack-netlib/SRC/sgehrd.c
 create mode 100644 lapack-netlib/SRC/sgejsv.c
 create mode 100644 lapack-netlib/SRC/sgelq.c
 create mode 100644 lapack-netlib/SRC/sgelq2.c
 create mode 100644 lapack-netlib/SRC/sgelqf.c
 create mode 100644 lapack-netlib/SRC/sgelqt.c
 create mode 100644 lapack-netlib/SRC/sgelqt3.c
 create mode 100644 lapack-netlib/SRC/sgels.c
 create mode 100644 lapack-netlib/SRC/sgelss.c
 create mode 100644 lapack-netlib/SRC/sgelsy.c
 create mode 100644 lapack-netlib/SRC/sgemlq.c
 create mode 100644 lapack-netlib/SRC/sgemlqt.c
 create mode 100644 lapack-netlib/SRC/sgemqr.c
 create mode 100644 lapack-netlib/SRC/sgemqrt.c
 create mode 100644 lapack-netlib/SRC/sgeql2.c
 create mode 100644 lapack-netlib/SRC/sgeqlf.c
 create mode 100644 lapack-netlib/SRC/sgeqp3.c
 create mode 100644 lapack-netlib/SRC/sgeqr.c
 create mode 100644 lapack-netlib/SRC/sgeqr2.c
 create mode 100644 lapack-netlib/SRC/sgeqr2p.c
 create mode 100644 lapack-netlib/SRC/sgeqrf.c
 create mode 100644 lapack-netlib/SRC/sgeqrfp.c
 create mode 100644 lapack-netlib/SRC/sgeqrt.c
 create mode 100644 lapack-netlib/SRC/sgeqrt2.c
 create mode 100644 lapack-netlib/SRC/sgeqrt3.c
 create mode 100644 lapack-netlib/SRC/sgerfs.c
 create mode 100644 lapack-netlib/SRC/sgerfsx.c
 create mode 100644 lapack-netlib/SRC/sgerq2.c
 create mode 100644 lapack-netlib/SRC/sgerqf.c
 create mode 100644 lapack-netlib/SRC/sgesc2.c
 create mode 100644 lapack-netlib/SRC/sgesdd.c
 create mode 100644 lapack-netlib/SRC/sgesv.c
 create mode 100644 lapack-netlib/SRC/sgesvd.c
 create mode 100644 lapack-netlib/SRC/sgesvdq.c
 create mode 100644 lapack-netlib/SRC/sgesvdx.c
 create mode 100644 lapack-netlib/SRC/sgesvj.c
 create mode 100644 lapack-netlib/SRC/sgesvx.c
 create mode 100644 lapack-netlib/SRC/sgesvxx.c
 create mode 100644 lapack-netlib/SRC/sgetc2.c
 create mode 100644 lapack-netlib/SRC/sgetf2.c
 create mode 100644 lapack-netlib/SRC/sgetrf.c
 create mode 100644 lapack-netlib/SRC/sgetrf2.c
 create mode 100644 lapack-netlib/SRC/sgetri.c
 create mode 100644 lapack-netlib/SRC/sgetrs.c
 create mode 100644 lapack-netlib/SRC/sgetsls.c
 create mode 100644 lapack-netlib/SRC/sgetsqrhrt.c
 create mode 100644 lapack-netlib/SRC/sggbak.c
 create mode 100644 lapack-netlib/SRC/sggbal.c
 create mode 100644 lapack-netlib/SRC/sgges.c
 create mode 100644 lapack-netlib/SRC/sgges3.c
 create mode 100644 lapack-netlib/SRC/sggesx.c
 create mode 100644 lapack-netlib/SRC/sggev.c
 create mode 100644 lapack-netlib/SRC/sggev3.c
 create mode 100644 lapack-netlib/SRC/sggevx.c
 create mode 100644 lapack-netlib/SRC/sggglm.c
 create mode 100644 lapack-netlib/SRC/sgghd3.c
 create mode 100644 lapack-netlib/SRC/sgghrd.c
 create mode 100644 lapack-netlib/SRC/sgglse.c
 create mode 100644 lapack-netlib/SRC/sggqrf.c
 create mode 100644 lapack-netlib/SRC/sggrqf.c
 create mode 100644 lapack-netlib/SRC/sggsvd3.c
 create mode 100644 lapack-netlib/SRC/sggsvp3.c
 create mode 100644 lapack-netlib/SRC/sgsvj0.c
 create mode 100644 lapack-netlib/SRC/sgsvj1.c
 create mode 100644 lapack-netlib/SRC/sgtcon.c
 create mode 100644 lapack-netlib/SRC/sgtrfs.c
 create mode 100644 lapack-netlib/SRC/sgtsv.c
 create mode 100644 lapack-netlib/SRC/sgtsvx.c
 create mode 100644 lapack-netlib/SRC/sgttrf.c
 create mode 100644 lapack-netlib/SRC/sgttrs.c
 create mode 100644 lapack-netlib/SRC/sgtts2.c
 create mode 100644 lapack-netlib/SRC/shgeqz.c
 create mode 100644 lapack-netlib/SRC/shsein.c
 create mode 100644 lapack-netlib/SRC/shseqr.c
 create mode 100644 lapack-netlib/SRC/sisnan.c
 create mode 100644 lapack-netlib/SRC/sla_gbamv.c
 create mode 100644 lapack-netlib/SRC/sla_gbrcond.c
 create mode 100644 lapack-netlib/SRC/sla_gbrfsx_extended.c
 create mode 100644 lapack-netlib/SRC/sla_gbrpvgrw.c
 create mode 100644 lapack-netlib/SRC/sla_geamv.c
 create mode 100644 lapack-netlib/SRC/sla_gercond.c
 create mode 100644 lapack-netlib/SRC/sla_gerfsx_extended.c
 create mode 100644 lapack-netlib/SRC/sla_gerpvgrw.c

diff --git a/lapack-netlib/SRC/sgbtrs.c b/lapack-netlib/SRC/sgbtrs.c
new file mode 100644
index 000000000..e67ecf8cf
--- /dev/null
+++ b/lapack-netlib/SRC/sgbtrs.c
@@ -0,0 +1,684 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static real c_b7 = -1.f;
+static integer c__1 = 1;
+static real c_b23 = 1.f;
+
+/* > \brief \b SGBTRS */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGBTRS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgbtrs.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgbtrs.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgbtrs.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, */
+/*                          INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               AB( LDAB, * ), B( LDB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGBTRS solves a system of linear equations */
+/* >    A * X = B  or  A**T * X = B */
+/* > with a general band matrix A using the LU factorization computed */
+/* > by SGBTRF. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations. */
+/* >          = 'N':  A * X = B  (No transpose) */
+/* >          = 'T':  A**T* X = B  (Transpose) */
+/* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is REAL array, dimension (LDAB,N) */
+/* >          Details of the LU factorization of the band matrix A, as */
+/* >          computed by SGBTRF.  U is stored as an upper triangular band */
+/* >          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
+/* >          the multipliers used during the factorization are stored in */
+/* >          rows KL+KU+2 to 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices; for 1 <= i <= N, row i of the matrix was */
+/* >          interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the right hand side matrix B. */
+/* >          On exit, the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgbtrs_(char *trans, integer *n, integer *kl, integer *
+	ku, integer *nrhs, real *ab, integer *ldab, integer *ipiv, real *b, 
+	integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
+	    integer *, real *, integer *, real *, integer *);
+    integer i__, j, l;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    logical lnoti;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *), stbsv_(char *, char *, char *, integer *, integer *, 
+	    real *, integer *, real *, integer *);
+    integer kd, lm;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical notran;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0) {
+	*info = -3;
+    } else if (*ku < 0) {
+	*info = -4;
+    } else if (*nrhs < 0) {
+	*info = -5;
+    } else if (*ldab < (*kl << 1) + *ku + 1) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGBTRS", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    kd = *ku + *kl + 1;
+    lnoti = *kl > 0;
+
+    if (notran) {
+
+/*        Solve  A*X = B. */
+
+/*        Solve L*X = B, overwriting B with X. */
+
+/*        L is represented as a product of permutations and unit lower */
+/*        triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), */
+/*        where each transformation L(i) is a rank-one modification of */
+/*        the identity matrix. */
+
+	if (lnoti) {
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		i__2 = *kl, i__3 = *n - j;
+		lm = f2cmin(i__2,i__3);
+		l = ipiv[j];
+		if (l != j) {
+		    sswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb);
+		}
+		sger_(&lm, nrhs, &c_b7, &ab[kd + 1 + j * ab_dim1], &c__1, &b[
+			j + b_dim1], ldb, &b[j + 1 + b_dim1], ldb);
+/* L10: */
+	    }
+	}
+
+	i__1 = *nrhs;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Solve U*X = B, overwriting B with X. */
+
+	    i__2 = *kl + *ku;
+	    stbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &ab[
+		    ab_offset], ldab, &b[i__ * b_dim1 + 1], &c__1);
+/* L20: */
+	}
+
+    } else {
+
+/*        Solve A**T*X = B. */
+
+	i__1 = *nrhs;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Solve U**T*X = B, overwriting B with X. */
+
+	    i__2 = *kl + *ku;
+	    stbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &ab[ab_offset],
+		     ldab, &b[i__ * b_dim1 + 1], &c__1);
+/* L30: */
+	}
+
+/*        Solve L**T*X = B, overwriting B with X. */
+
+	if (lnoti) {
+	    for (j = *n - 1; j >= 1; --j) {
+/* Computing MIN */
+		i__1 = *kl, i__2 = *n - j;
+		lm = f2cmin(i__1,i__2);
+		sgemv_("Transpose", &lm, nrhs, &c_b7, &b[j + 1 + b_dim1], ldb,
+			 &ab[kd + 1 + j * ab_dim1], &c__1, &c_b23, &b[j + 
+			b_dim1], ldb);
+		l = ipiv[j];
+		if (l != j) {
+		    sswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb);
+		}
+/* L40: */
+	    }
+	}
+    }
+    return 0;
+
+/*     End of SGBTRS */
+
+} /* sgbtrs_ */
+
diff --git a/lapack-netlib/SRC/sgebak.c b/lapack-netlib/SRC/sgebak.c
new file mode 100644
index 000000000..db6ec051c
--- /dev/null
+++ b/lapack-netlib/SRC/sgebak.c
@@ -0,0 +1,674 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGEBAK */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEBAK + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgebak.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgebak.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgebak.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, */
+/*                          INFO ) */
+
+/*       CHARACTER          JOB, SIDE */
+/*       INTEGER            IHI, ILO, INFO, LDV, M, N */
+/*       REAL               V( LDV, * ), SCALE( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEBAK forms the right or left eigenvectors of a real general matrix */
+/* > by backward transformation on the computed eigenvectors of the */
+/* > balanced matrix output by SGEBAL. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOB */
+/* > \verbatim */
+/* >          JOB is CHARACTER*1 */
+/* >          Specifies the type of backward transformation required: */
+/* >          = 'N': do nothing, return immediately; */
+/* >          = 'P': do backward transformation for permutation only; */
+/* >          = 'S': do backward transformation for scaling only; */
+/* >          = 'B': do backward transformations for both permutation and */
+/* >                 scaling. */
+/* >          JOB must be the same as the argument JOB supplied to SGEBAL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'R':  V contains right eigenvectors; */
+/* >          = 'L':  V contains left eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of rows of the matrix V.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          The integers ILO and IHI determined by SGEBAL. */
+/* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SCALE */
+/* > \verbatim */
+/* >          SCALE is REAL array, dimension (N) */
+/* >          Details of the permutation and scaling factors, as returned */
+/* >          by SGEBAL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of columns of the matrix V.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension (LDV,M) */
+/* >          On entry, the matrix of right or left eigenvectors to be */
+/* >          transformed, as returned by SHSEIN or STREVC. */
+/* >          On exit, V is overwritten by the transformed eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. LDV >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgebak_(char *job, char *side, integer *n, integer *ilo, 
+	integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer 
+	*info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, i__1;
+
+    /* Local variables */
+    integer i__, k;
+    real s;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    logical leftv;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *);
+    integer ii;
+    extern /* Subroutine */ int xerbla_(char *, integer *,ftnlen);
+    logical rightv;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode and Test the input parameters */
+
+    /* Parameter adjustments */
+    --scale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+
+    /* Function Body */
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
+
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (! rightv && ! leftv) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
+	*info = -4;
+    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*ldv < f2cmax(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBAK", &i__1,(ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	goto L30;
+    }
+
+/*     Backward balance */
+
+    if (lsame_(job, "S") || lsame_(job, "B")) {
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = scale[i__];
+		sscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L10: */
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = 1.f / scale[i__];
+		sscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
+
+    }
+
+/*     Backward permutation */
+
+/*     For  I = ILO-1 step -1 until 1, */
+/*              IHI+1 step 1 until N do -- */
+
+L30:
+    if (lsame_(job, "P") || lsame_(job, "B")) {
+	if (rightv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L40;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L50;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L50;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L50:
+		;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SGEBAK */
+
+} /* sgebak_ */
+
diff --git a/lapack-netlib/SRC/sgebal.c b/lapack-netlib/SRC/sgebal.c
new file mode 100644
index 000000000..672a0b985
--- /dev/null
+++ b/lapack-netlib/SRC/sgebal.c
@@ -0,0 +1,838 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SGEBAL */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEBAL + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgebal.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgebal.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgebal.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) */
+
+/*       CHARACTER          JOB */
+/*       INTEGER            IHI, ILO, INFO, LDA, N */
+/*       REAL               A( LDA, * ), SCALE( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEBAL balances a general real matrix A.  This involves, first, */
+/* > permuting A by a similarity transformation to isolate eigenvalues */
+/* > in the first 1 to ILO-1 and last IHI+1 to N elements on the */
+/* > diagonal; and second, applying a diagonal similarity transformation */
+/* > to rows and columns ILO to IHI to make the rows and columns as */
+/* > close in norm as possible.  Both steps are optional. */
+/* > */
+/* > Balancing may reduce the 1-norm of the matrix, and improve the */
+/* > accuracy of the computed eigenvalues and/or eigenvectors. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOB */
+/* > \verbatim */
+/* >          JOB is CHARACTER*1 */
+/* >          Specifies the operations to be performed on A: */
+/* >          = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0 */
+/* >                  for i = 1,...,N; */
+/* >          = 'P':  permute only; */
+/* >          = 'S':  scale only; */
+/* >          = 'B':  both permute and scale. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the input matrix A. */
+/* >          On exit,  A is overwritten by the balanced matrix. */
+/* >          If JOB = 'N', A is not referenced. */
+/* >          See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > \param[out] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          ILO and IHI are set to integers such that on exit */
+/* >          A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. */
+/* >          If JOB = 'N' or 'S', ILO = 1 and IHI = N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is REAL array, dimension (N) */
+/* >          Details of the permutations and scaling factors applied to */
+/* >          A.  If P(j) is the index of the row and column interchanged */
+/* >          with row and column j and D(j) is the scaling factor */
+/* >          applied to row and column j, then */
+/* >          SCALE(j) = P(j)    for j = 1,...,ILO-1 */
+/* >                   = D(j)    for j = ILO,...,IHI */
+/* >                   = P(j)    for j = IHI+1,...,N. */
+/* >          The order in which the interchanges are made is N to IHI+1, */
+/* >          then 1 to ILO-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The permutations consist of row and column interchanges which put */
+/* >  the matrix in the form */
+/* > */
+/* >             ( T1   X   Y  ) */
+/* >     P A P = (  0   B   Z  ) */
+/* >             (  0   0   T2 ) */
+/* > */
+/* >  where T1 and T2 are upper triangular matrices whose eigenvalues lie */
+/* >  along the diagonal.  The column indices ILO and IHI mark the starting */
+/* >  and ending columns of the submatrix B. Balancing consists of applying */
+/* >  a diagonal similarity transformation inv(D) * B * D to make the */
+/* >  1-norms of each row of B and its corresponding column nearly equal. */
+/* >  The output matrix is */
+/* > */
+/* >     ( T1     X*D          Y    ) */
+/* >     (  0  inv(D)*B*D  inv(D)*Z ). */
+/* >     (  0      0           T2   ) */
+/* > */
+/* >  Information about the permutations P and the diagonal matrix D is */
+/* >  returned in the vector SCALE. */
+/* > */
+/* >  This subroutine is based on the EISPACK routine BALANC. */
+/* > */
+/* >  Modified by Tzu-Yi Chen, Computer Science Division, University of */
+/* >    California at Berkeley, USA */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgebal_(char *job, integer *n, real *a, integer *lda, 
+	integer *ilo, integer *ihi, real *scale, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Local variables */
+    integer iexc;
+    extern real snrm2_(integer *, real *, integer *);
+    real c__, f, g;
+    integer i__, j, k, l, m;
+    real r__, s;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
+	    sswap_(integer *, real *, integer *, real *, integer *);
+    real sfmin1, sfmin2, sfmax1, sfmax2, ca, ra;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *,ftnlen);
+    extern integer isamax_(integer *, real *, integer *);
+    extern logical sisnan_(real *);
+    logical noconv;
+    integer ica, ira;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --scale;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBAL", &i__1,(ftnlen)6);
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+
+    if (*n == 0) {
+	goto L210;
+    }
+
+    if (lsame_(job, "N")) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scale[i__] = 1.f;
+/* L10: */
+	}
+	goto L210;
+    }
+
+    if (lsame_(job, "S")) {
+	goto L120;
+    }
+
+/*     Permutation to isolate eigenvalues if possible */
+
+    goto L50;
+
+/*     Row and column exchange. */
+
+L20:
+    scale[m] = (real) j;
+    if (j == m) {
+	goto L30;
+    }
+
+    sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    i__1 = *n - k + 1;
+    sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+
+L30:
+    switch (iexc) {
+	case 1:  goto L40;
+	case 2:  goto L80;
+    }
+
+/*     Search for rows isolating an eigenvalue and push them down. */
+
+L40:
+    if (l == 1) {
+	goto L210;
+    }
+    --l;
+
+L50:
+    for (j = l; j >= 1; --j) {
+
+	i__1 = l;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (i__ == j) {
+		goto L60;
+	    }
+	    if (a[j + i__ * a_dim1] != 0.f) {
+		goto L70;
+	    }
+L60:
+	    ;
+	}
+
+	m = l;
+	iexc = 1;
+	goto L20;
+L70:
+	;
+    }
+
+    goto L90;
+
+/*     Search for columns isolating an eigenvalue and push them left. */
+
+L80:
+    ++k;
+
+L90:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+
+	i__2 = l;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    if (i__ == j) {
+		goto L100;
+	    }
+	    if (a[i__ + j * a_dim1] != 0.f) {
+		goto L110;
+	    }
+L100:
+	    ;
+	}
+
+	m = k;
+	iexc = 2;
+	goto L20;
+L110:
+	;
+    }
+
+L120:
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	scale[i__] = 1.f;
+/* L130: */
+    }
+
+    if (lsame_(job, "P")) {
+	goto L210;
+    }
+
+/*     Balance the submatrix in rows K to L. */
+
+/*     Iterative loop for norm reduction */
+
+    sfmin1 = slamch_("S") / slamch_("P");
+    sfmax1 = 1.f / sfmin1;
+    sfmin2 = sfmin1 * 2.f;
+    sfmax2 = 1.f / sfmin2;
+L140:
+    noconv = FALSE_;
+
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+
+	i__2 = l - k + 1;
+	c__ = snrm2_(&i__2, &a[k + i__ * a_dim1], &c__1);
+	i__2 = l - k + 1;
+	r__ = snrm2_(&i__2, &a[i__ + k * a_dim1], lda);
+	ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = (r__1 = a[ica + i__ * a_dim1], abs(r__1));
+	i__2 = *n - k + 1;
+	ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], abs(r__1));
+
+/*        Guard against zero C or R due to underflow. */
+
+	if (c__ == 0.f || r__ == 0.f) {
+	    goto L200;
+	}
+	g = r__ / 2.f;
+	f = 1.f;
+	s = c__ + r__;
+L160:
+/* Computing MAX */
+	r__1 = f2cmax(f,c__);
+/* Computing MIN */
+	r__2 = f2cmin(r__,g);
+	if (c__ >= g || f2cmax(r__1,ca) >= sfmax2 || f2cmin(r__2,ra) <= sfmin2) {
+	    goto L170;
+	}
+	f *= 2.f;
+	c__ *= 2.f;
+	ca *= 2.f;
+	r__ /= 2.f;
+	g /= 2.f;
+	ra /= 2.f;
+	goto L160;
+
+L170:
+	g = c__ / 2.f;
+L180:
+/* Computing MIN */
+	r__1 = f2cmin(f,c__), r__1 = f2cmin(r__1,g);
+	if (g < r__ || f2cmax(r__,ra) >= sfmax2 || f2cmin(r__1,ca) <= sfmin2) {
+	    goto L190;
+	}
+	r__1 = c__ + f + ca + r__ + g + ra;
+	if (sisnan_(&r__1)) {
+
+/*           Exit if NaN to avoid infinite loop */
+
+	    *info = -3;
+	    i__2 = -(*info);
+	    xerbla_("SGEBAL", &i__2, (ftnlen)6);
+	    return 0;
+	}
+	f /= 2.f;
+	c__ /= 2.f;
+	g /= 2.f;
+	ca /= 2.f;
+	r__ *= 2.f;
+	ra *= 2.f;
+	goto L180;
+
+/*        Now balance. */
+
+L190:
+	if (c__ + r__ >= s * .95f) {
+	    goto L200;
+	}
+	if (f < 1.f && scale[i__] < 1.f) {
+	    if (f * scale[i__] <= sfmin1) {
+		goto L200;
+	    }
+	}
+	if (f > 1.f && scale[i__] > 1.f) {
+	    if (scale[i__] >= sfmax1 / f) {
+		goto L200;
+	    }
+	}
+	g = 1.f / f;
+	scale[i__] *= f;
+	noconv = TRUE_;
+
+	i__2 = *n - k + 1;
+	sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+
+L200:
+	;
+    }
+
+    if (noconv) {
+	goto L140;
+    }
+
+L210:
+    *ilo = k;
+    *ihi = l;
+
+    return 0;
+
+/*     End of SGEBAL */
+
+} /* sgebal_ */
+
diff --git a/lapack-netlib/SRC/sgebd2.c b/lapack-netlib/SRC/sgebd2.c
new file mode 100644
index 000000000..0e774257e
--- /dev/null
+++ b/lapack-netlib/SRC/sgebd2.c
@@ -0,0 +1,744 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEBD2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgebd2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgebd2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgebd2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       REAL               A( LDA, * ), D( * ), E( * ), TAUP( * ), */
+/*      $                   TAUQ( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEBD2 reduces a real general m by n matrix A to upper or lower */
+/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */
+/* > */
+/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows in the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns in the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the m by n general matrix to be reduced. */
+/* >          On exit, */
+/* >          if m >= n, the diagonal and the first superdiagonal are */
+/* >            overwritten with the upper bidiagonal matrix B; the */
+/* >            elements below the diagonal, with the array TAUQ, represent */
+/* >            the orthogonal matrix Q as a product of elementary */
+/* >            reflectors, and the elements above the first superdiagonal, */
+/* >            with the array TAUP, represent the orthogonal matrix P as */
+/* >            a product of elementary reflectors; */
+/* >          if m < n, the diagonal and the first subdiagonal are */
+/* >            overwritten with the lower bidiagonal matrix B; the */
+/* >            elements below the first subdiagonal, with the array TAUQ, */
+/* >            represent the orthogonal matrix Q as a product of */
+/* >            elementary reflectors, and the elements above the diagonal, */
+/* >            with the array TAUP, represent the orthogonal matrix P as */
+/* >            a product of elementary reflectors. */
+/* >          See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (f2cmin(M,N)) */
+/* >          The diagonal elements of the bidiagonal matrix B: */
+/* >          D(i) = A(i,i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] E */
+/* > \verbatim */
+/* >          E is REAL array, dimension (f2cmin(M,N)-1) */
+/* >          The off-diagonal elements of the bidiagonal matrix B: */
+/* >          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */
+/* >          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUQ */
+/* > \verbatim */
+/* >          TAUQ is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix Q. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUP */
+/* > \verbatim */
+/* >          TAUP is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix P. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (f2cmax(M,N)) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit. */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrices Q and P are represented as products of elementary */
+/* >  reflectors: */
+/* > */
+/* >  If m >= n, */
+/* > */
+/* >     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1) */
+/* > */
+/* >  Each H(i) and G(i) has the form: */
+/* > */
+/* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
+/* > */
+/* >  where tauq and taup are real scalars, and v and u are real vectors; */
+/* >  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */
+/* >  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */
+/* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
+/* > */
+/* >  If m < n, */
+/* > */
+/* >     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m) */
+/* > */
+/* >  Each H(i) and G(i) has the form: */
+/* > */
+/* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
+/* > */
+/* >  where tauq and taup are real scalars, and v and u are real vectors; */
+/* >  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */
+/* >  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */
+/* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
+/* > */
+/* >  The contents of A on exit are illustrated by the following examples: */
+/* > */
+/* >  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n): */
+/* > */
+/* >    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 ) */
+/* >    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 ) */
+/* >    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 ) */
+/* >    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 ) */
+/* >    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 ) */
+/* >    (  v1  v2  v3  v4  v5 ) */
+/* > */
+/* >  where d and e denote diagonal and off-diagonal elements of B, vi */
+/* >  denotes an element of the vector defining H(i), and ui an element of */
+/* >  the vector defining G(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgebd2_(integer *m, integer *n, real *a, integer *lda, 
+	real *d__, real *e, real *tauq, real *taup, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
+	    integer *, real *);
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBD2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * 
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    if (i__ < *n) {
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &
+			tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]
+			);
+	    }
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *n) {
+
+/*              Generate elementary reflector G(i) to annihilate */
+/*              A(i,i+2:n) */
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + f2cmin(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.f;
+
+/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		slarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], 
+			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], 
+			lda, &work[1]);
+		a[i__ + (i__ + 1) * a_dim1] = e[i__];
+	    } else {
+		taup[i__] = 0.f;
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * 
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+
+/*           Apply G(i) to A(i+1:m,i:n) from the right */
+
+	    if (i__ < *m) {
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &
+			taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *m) {
+
+/*              Generate elementary reflector H(i) to annihilate */
+/*              A(i+2:m,i) */
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*m) + 
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*              Apply H(i) to A(i+1:m,i+1:n) from the left */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], 
+			lda, &work[1]);
+		a[i__ + 1 + i__ * a_dim1] = e[i__];
+	    } else {
+		tauq[i__] = 0.f;
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of SGEBD2 */
+
+} /* sgebd2_ */
+
diff --git a/lapack-netlib/SRC/sgebrd.c b/lapack-netlib/SRC/sgebrd.c
new file mode 100644
index 000000000..8c89c5136
--- /dev/null
+++ b/lapack-netlib/SRC/sgebrd.c
@@ -0,0 +1,783 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+static real c_b21 = -1.f;
+static real c_b22 = 1.f;
+
+/* > \brief \b SGEBRD */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEBRD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgebrd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgebrd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgebrd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, */
+/*                          INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       REAL               A( LDA, * ), D( * ), E( * ), TAUP( * ), */
+/*      $                   TAUQ( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEBRD reduces a general real M-by-N matrix A to upper or lower */
+/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */
+/* > */
+/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows in the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns in the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N general matrix to be reduced. */
+/* >          On exit, */
+/* >          if m >= n, the diagonal and the first superdiagonal are */
+/* >            overwritten with the upper bidiagonal matrix B; the */
+/* >            elements below the diagonal, with the array TAUQ, represent */
+/* >            the orthogonal matrix Q as a product of elementary */
+/* >            reflectors, and the elements above the first superdiagonal, */
+/* >            with the array TAUP, represent the orthogonal matrix P as */
+/* >            a product of elementary reflectors; */
+/* >          if m < n, the diagonal and the first subdiagonal are */
+/* >            overwritten with the lower bidiagonal matrix B; the */
+/* >            elements below the first subdiagonal, with the array TAUQ, */
+/* >            represent the orthogonal matrix Q as a product of */
+/* >            elementary reflectors, and the elements above the diagonal, */
+/* >            with the array TAUP, represent the orthogonal matrix P as */
+/* >            a product of elementary reflectors. */
+/* >          See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (f2cmin(M,N)) */
+/* >          The diagonal elements of the bidiagonal matrix B: */
+/* >          D(i) = A(i,i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] E */
+/* > \verbatim */
+/* >          E is REAL array, dimension (f2cmin(M,N)-1) */
+/* >          The off-diagonal elements of the bidiagonal matrix B: */
+/* >          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */
+/* >          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUQ */
+/* > \verbatim */
+/* >          TAUQ is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix Q. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUP */
+/* > \verbatim */
+/* >          TAUP is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix P. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The length of the array WORK.  LWORK >= f2cmax(1,M,N). */
+/* >          For optimum performance LWORK >= (M+N)*NB, where NB */
+/* >          is the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrices Q and P are represented as products of elementary */
+/* >  reflectors: */
+/* > */
+/* >  If m >= n, */
+/* > */
+/* >     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1) */
+/* > */
+/* >  Each H(i) and G(i) has the form: */
+/* > */
+/* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
+/* > */
+/* >  where tauq and taup are real scalars, and v and u are real vectors; */
+/* >  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */
+/* >  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */
+/* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
+/* > */
+/* >  If m < n, */
+/* > */
+/* >     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m) */
+/* > */
+/* >  Each H(i) and G(i) has the form: */
+/* > */
+/* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
+/* > */
+/* >  where tauq and taup are real scalars, and v and u are real vectors; */
+/* >  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */
+/* >  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */
+/* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
+/* > */
+/* >  The contents of A on exit are illustrated by the following examples: */
+/* > */
+/* >  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n): */
+/* > */
+/* >    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 ) */
+/* >    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 ) */
+/* >    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 ) */
+/* >    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 ) */
+/* >    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 ) */
+/* >    (  v1  v2  v3  v4  v5 ) */
+/* > */
+/* >  where d and e denote diagonal and off-diagonal elements of B, vi */
+/* >  denotes an element of the vector defining H(i), and ui an element of */
+/* >  the vector defining G(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgebrd_(integer *m, integer *n, real *a, integer *lda, 
+	real *d__, real *e, real *tauq, real *taup, real *work, integer *
+	lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, j, nbmin, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *);
+    integer minmn;
+    extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer 
+	    *, real *, real *, real *, real *, real *, integer *);
+    integer nb, nx;
+    extern /* Subroutine */ int slabrd_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, real *, real *, real *, integer *, 
+	    real *, integer *);
+    integer ws;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwrkx, ldwrky, lwkopt;
+    logical lquery;
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = f2cmax(i__1,i__2);
+    lwkopt = (*m + *n) * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*m);
+	if (*lwork < f2cmax(i__1,*n) && ! lquery) {
+	    *info = -10;
+	}
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBRD", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    minmn = f2cmin(*m,*n);
+    if (minmn == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    ws = f2cmax(*m,*n);
+    ldwrkx = *m;
+    ldwrky = *n;
+
+    if (nb > 1 && nb < minmn) {
+
+/*        Set the crossover point NX. */
+
+/* Computing MAX */
+	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+
+/*        Determine when to switch from blocked to unblocked code. */
+
+	if (nx < minmn) {
+	    ws = (*m + *n) * nb;
+	    if (*lwork < ws) {
+
+/*              Not enough work space for the optimal NB, consider using */
+/*              a smaller block size. */
+
+		nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		if (*lwork >= (*m + *n) * nbmin) {
+		    nb = *lwork / (*m + *n);
+		} else {
+		    nb = 1;
+		    nx = minmn;
+		}
+	    }
+	}
+    } else {
+	nx = minmn;
+    }
+
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+
+/*        Reduce rows and columns i:i+nb-1 to bidiagonal form and return */
+/*        the matrices X and Y which are needed to update the unreduced */
+/*        part of the matrix */
+
+	i__3 = *m - i__ + 1;
+	i__4 = *n - i__ + 1;
+	slabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx 
+		* nb + 1], &ldwrky);
+
+/*        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update */
+/*        of the form  A := A - V*Y**T - X*U**T */
+
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ 
+		+ nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
+		ldwrky, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, &
+		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
+		c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*        Copy diagonal and off-diagonal elements of B back into A */
+
+	if (*m >= *n) {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + (j + 1) * a_dim1] = e[j];
+/* L10: */
+	    }
+	} else {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + 1 + j * a_dim1] = e[j];
+/* L20: */
+	    }
+	}
+/* L30: */
+    }
+
+/*     Use unblocked code to reduce the remainder of the matrix */
+
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    sgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+	    tauq[i__], &taup[i__], &work[1], &iinfo);
+    work[1] = (real) ws;
+    return 0;
+
+/*     End of SGEBRD */
+
+} /* sgebrd_ */
+
diff --git a/lapack-netlib/SRC/sgecon.c b/lapack-netlib/SRC/sgecon.c
new file mode 100644
index 000000000..8d5356683
--- /dev/null
+++ b/lapack-netlib/SRC/sgecon.c
@@ -0,0 +1,656 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SGECON */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGECON + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgecon.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgecon.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgecon.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, */
+/*                          INFO ) */
+
+/*       CHARACTER          NORM */
+/*       INTEGER            INFO, LDA, N */
+/*       REAL               ANORM, RCOND */
+/*       INTEGER            IWORK( * ) */
+/*       REAL               A( LDA, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGECON estimates the reciprocal of the condition number of a general */
+/* > real matrix A, in either the 1-norm or the infinity-norm, using */
+/* > the LU factorization computed by SGETRF. */
+/* > */
+/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
+/* > condition number is computed as */
+/* >    RCOND = 1 / ( norm(A) * norm(inv(A)) ). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] NORM */
+/* > \verbatim */
+/* >          NORM is CHARACTER*1 */
+/* >          Specifies whether the 1-norm condition number or the */
+/* >          infinity-norm condition number is required: */
+/* >          = '1' or 'O':  1-norm; */
+/* >          = 'I':         Infinity-norm. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          The factors L and U from the factorization A = P*L*U */
+/* >          as computed by SGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ANORM */
+/* > \verbatim */
+/* >          ANORM is REAL */
+/* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
+/* >          If NORM = 'I', the infinity-norm of the original matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >          The reciprocal of the condition number of the matrix A, */
+/* >          computed as RCOND = 1/(norm(A) * norm(inv(A))). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (4*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgecon_(char *norm, integer *n, real *a, integer *lda, 
+	real *anorm, real *rcond, real *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1;
+    real r__1;
+
+    /* Local variables */
+    integer kase, kase1;
+    real scale;
+    extern logical lsame_(char *, char *);
+    integer isave[3];
+    extern /* Subroutine */ int srscl_(integer *, real *, real *, integer *), 
+	    slacn2_(integer *, real *, real *, integer *, real *, integer *, 
+	    integer *);
+    real sl;
+    integer ix;
+    extern real slamch_(char *);
+    real su;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer isamax_(integer *, real *, integer *);
+    real ainvnm;
+    logical onenrm;
+    char normin[1];
+    extern /* Subroutine */ int slatrs_(char *, char *, char *, char *, 
+	    integer *, real *, integer *, real *, real *, real *, integer *);
+    real smlnum;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
+    if (! onenrm && ! lsame_(norm, "I")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -4;
+    } else if (*anorm < 0.f) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGECON", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    *rcond = 0.f;
+    if (*n == 0) {
+	*rcond = 1.f;
+	return 0;
+    } else if (*anorm == 0.f) {
+	return 0;
+    }
+
+    smlnum = slamch_("Safe minimum");
+
+/*     Estimate the norm of inv(A). */
+
+    ainvnm = 0.f;
+    *(unsigned char *)normin = 'N';
+    if (onenrm) {
+	kase1 = 1;
+    } else {
+	kase1 = 2;
+    }
+    kase = 0;
+L10:
+    slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
+    if (kase != 0) {
+	if (kase == kase1) {
+
+/*           Multiply by inv(L). */
+
+	    slatrs_("Lower", "No transpose", "Unit", normin, n, &a[a_offset], 
+		    lda, &work[1], &sl, &work[(*n << 1) + 1], info);
+
+/*           Multiply by inv(U). */
+
+	    slatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[
+		    a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info);
+	} else {
+
+/*           Multiply by inv(U**T). */
+
+	    slatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset],
+		     lda, &work[1], &su, &work[*n * 3 + 1], info);
+
+/*           Multiply by inv(L**T). */
+
+	    slatrs_("Lower", "Transpose", "Unit", normin, n, &a[a_offset], 
+		    lda, &work[1], &sl, &work[(*n << 1) + 1], info);
+	}
+
+/*        Divide X by 1/(SL*SU) if doing so will not cause overflow. */
+
+	scale = sl * su;
+	*(unsigned char *)normin = 'Y';
+	if (scale != 1.f) {
+	    ix = isamax_(n, &work[1], &c__1);
+	    if (scale < (r__1 = work[ix], abs(r__1)) * smlnum || scale == 0.f)
+		     {
+		goto L20;
+	    }
+	    srscl_(n, &scale, &work[1], &c__1);
+	}
+	goto L10;
+    }
+
+/*     Compute the estimate of the reciprocal condition number. */
+
+    if (ainvnm != 0.f) {
+	*rcond = 1.f / ainvnm / *anorm;
+    }
+
+L20:
+    return 0;
+
+/*     End of SGECON */
+
+} /* sgecon_ */
+
diff --git a/lapack-netlib/SRC/sgeequ.c b/lapack-netlib/SRC/sgeequ.c
new file mode 100644
index 000000000..165450d55
--- /dev/null
+++ b/lapack-netlib/SRC/sgeequ.c
@@ -0,0 +1,733 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGEEQU */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEEQU + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeequ.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeequ.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeequ.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */
+/*                          INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       REAL               AMAX, COLCND, ROWCND */
+/*       REAL               A( LDA, * ), C( * ), R( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEEQU computes row and column scalings intended to equilibrate an */
+/* > M-by-N matrix A and reduce its condition number.  R returns the row */
+/* > scale factors and C the column scale factors, chosen to try to make */
+/* > the largest element in each row and column of the matrix B with */
+/* > elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */
+/* > */
+/* > R(i) and C(j) are restricted to be between SMLNUM = smallest safe */
+/* > number and BIGNUM = largest safe number.  Use of these scaling */
+/* > factors is not guaranteed to reduce the condition number of A but */
+/* > works well in practice. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          The M-by-N matrix whose equilibration factors are */
+/* >          to be computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] R */
+/* > \verbatim */
+/* >          R is REAL array, dimension (M) */
+/* >          If INFO = 0 or INFO > M, R contains the row scale factors */
+/* >          for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (N) */
+/* >          If INFO = 0,  C contains the column scale factors for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ROWCND */
+/* > \verbatim */
+/* >          ROWCND is REAL */
+/* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
+/* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
+/* >          AMAX is neither too large nor too small, it is not worth */
+/* >          scaling by R. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] COLCND */
+/* > \verbatim */
+/* >          COLCND is REAL */
+/* >          If INFO = 0, COLCND contains the ratio of the smallest */
+/* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
+/* >          worth scaling by C. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] AMAX */
+/* > \verbatim */
+/* >          AMAX is REAL */
+/* >          Absolute value of largest matrix element.  If AMAX is very */
+/* >          close to overflow or very close to underflow, the matrix */
+/* >          should be scaled. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i,  and i is */
+/* >                <= M:  the i-th row of A is exactly zero */
+/* >                >  M:  the (i-M)-th column of A is exactly zero */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgeequ_(integer *m, integer *n, real *a, integer *lda, 
+	real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer 
+	*info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1, r__2, r__3;
+
+    /* Local variables */
+    integer i__, j;
+    real rcmin, rcmax;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    real bignum, smlnum;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --r__;
+    --c__;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEEQU", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	*rowcnd = 1.f;
+	*colcnd = 1.f;
+	*amax = 0.f;
+	return 0;
+    }
+
+/*     Get machine constants. */
+
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+
+/*     Compute row scale factors. */
+
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__[i__] = 0.f;
+/* L10: */
+    }
+
+/*     Find the maximum element in each row. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    r__2 = r__[i__], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1));
+	    r__[i__] = f2cmax(r__2,r__3);
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.f;
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	r__1 = rcmax, r__2 = r__[i__];
+	rcmax = f2cmax(r__1,r__2);
+/* Computing MIN */
+	r__1 = rcmin, r__2 = r__[i__];
+	rcmin = f2cmin(r__1,r__2);
+/* L40: */
+    }
+    *amax = rcmax;
+
+    if (rcmin == 0.f) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (r__[i__] == 0.f) {
+		*info = i__;
+		return 0;
+	    }
+/* L50: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MIN */
+/* Computing MAX */
+	    r__2 = r__[i__];
+	    r__1 = f2cmax(r__2,smlnum);
+	    r__[i__] = 1.f / f2cmin(r__1,bignum);
+/* L60: */
+	}
+
+/*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)) */
+
+	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+/*     Compute column scale factors */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	c__[j] = 0.f;
+/* L70: */
+    }
+
+/*     Find the maximum element in each column, */
+/*     assuming the row scaling computed above. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    r__2 = c__[j], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1)) * 
+		    r__[i__];
+	    c__[j] = f2cmax(r__2,r__3);
+/* L80: */
+	}
+/* L90: */
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.f;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	r__1 = rcmin, r__2 = c__[j];
+	rcmin = f2cmin(r__1,r__2);
+/* Computing MAX */
+	r__1 = rcmax, r__2 = c__[j];
+	rcmax = f2cmax(r__1,r__2);
+/* L100: */
+    }
+
+    if (rcmin == 0.f) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (c__[j] == 0.f) {
+		*info = *m + j;
+		return 0;
+	    }
+/* L110: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+/* Computing MAX */
+	    r__2 = c__[j];
+	    r__1 = f2cmax(r__2,smlnum);
+	    c__[j] = 1.f / f2cmin(r__1,bignum);
+/* L120: */
+	}
+
+/*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)) */
+
+	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+    return 0;
+
+/*     End of SGEEQU */
+
+} /* sgeequ_ */
+
diff --git a/lapack-netlib/SRC/sgeequb.c b/lapack-netlib/SRC/sgeequb.c
new file mode 100644
index 000000000..fb6055bbf
--- /dev/null
+++ b/lapack-netlib/SRC/sgeequb.c
@@ -0,0 +1,753 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGEEQUB */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEEQUB + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeequb
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeequb
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeequb
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */
+/*                           INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       REAL               AMAX, COLCND, ROWCND */
+/*       REAL               A( LDA, * ), C( * ), R( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEEQUB computes row and column scalings intended to equilibrate an */
+/* > M-by-N matrix A and reduce its condition number.  R returns the row */
+/* > scale factors and C the column scale factors, chosen to try to make */
+/* > the largest element in each row and column of the matrix B with */
+/* > elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */
+/* > the radix. */
+/* > */
+/* > R(i) and C(j) are restricted to be a power of the radix between */
+/* > SMLNUM = smallest safe number and BIGNUM = largest safe number.  Use */
+/* > of these scaling factors is not guaranteed to reduce the condition */
+/* > number of A but works well in practice. */
+/* > */
+/* > This routine differs from SGEEQU by restricting the scaling factors */
+/* > to a power of the radix.  Barring over- and underflow, scaling by */
+/* > these factors introduces no additional rounding errors.  However, the */
+/* > scaled entries' magnitudes are no longer approximately 1 but lie */
+/* > between sqrt(radix) and 1/sqrt(radix). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          The M-by-N matrix whose equilibration factors are */
+/* >          to be computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] R */
+/* > \verbatim */
+/* >          R is REAL array, dimension (M) */
+/* >          If INFO = 0 or INFO > M, R contains the row scale factors */
+/* >          for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (N) */
+/* >          If INFO = 0,  C contains the column scale factors for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ROWCND */
+/* > \verbatim */
+/* >          ROWCND is REAL */
+/* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
+/* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
+/* >          AMAX is neither too large nor too small, it is not worth */
+/* >          scaling by R. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] COLCND */
+/* > \verbatim */
+/* >          COLCND is REAL */
+/* >          If INFO = 0, COLCND contains the ratio of the smallest */
+/* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
+/* >          worth scaling by C. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] AMAX */
+/* > \verbatim */
+/* >          AMAX is REAL */
+/* >          Absolute value of largest matrix element.  If AMAX is very */
+/* >          close to overflow or very close to underflow, the matrix */
+/* >          should be scaled. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i,  and i is */
+/* >                <= M:  the i-th row of A is exactly zero */
+/* >                >  M:  the (i-M)-th column of A is exactly zero */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgeequb_(integer *m, integer *n, real *a, integer *lda, 
+	real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer 
+	*info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1, r__2, r__3;
+
+    /* Local variables */
+    integer i__, j;
+    real radix, rcmin, rcmax;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    real bignum, logrdx, smlnum;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --r__;
+    --c__;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEEQUB", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0) {
+	*rowcnd = 1.f;
+	*colcnd = 1.f;
+	*amax = 0.f;
+	return 0;
+    }
+
+/*     Get machine constants.  Assume SMLNUM is a power of the radix. */
+
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    radix = slamch_("B");
+    logrdx = log(radix);
+
+/*     Compute row scale factors. */
+
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__[i__] = 0.f;
+/* L10: */
+    }
+
+/*     Find the maximum element in each row. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    r__2 = r__[i__], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1));
+	    r__[i__] = f2cmax(r__2,r__3);
+/* L20: */
+	}
+/* L30: */
+    }
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (r__[i__] > 0.f) {
+	    i__2 = (integer) (log(r__[i__]) / logrdx);
+	    r__[i__] = pow_ri(&radix, &i__2);
+	}
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.f;
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	r__1 = rcmax, r__2 = r__[i__];
+	rcmax = f2cmax(r__1,r__2);
+/* Computing MIN */
+	r__1 = rcmin, r__2 = r__[i__];
+	rcmin = f2cmin(r__1,r__2);
+/* L40: */
+    }
+    *amax = rcmax;
+
+    if (rcmin == 0.f) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (r__[i__] == 0.f) {
+		*info = i__;
+		return 0;
+	    }
+/* L50: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MIN */
+/* Computing MAX */
+	    r__2 = r__[i__];
+	    r__1 = f2cmax(r__2,smlnum);
+	    r__[i__] = 1.f / f2cmin(r__1,bignum);
+/* L60: */
+	}
+
+/*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)). */
+
+	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+/*     Compute column scale factors */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	c__[j] = 0.f;
+/* L70: */
+    }
+
+/*     Find the maximum element in each column, */
+/*     assuming the row scaling computed above. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    r__2 = c__[j], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1)) * 
+		    r__[i__];
+	    c__[j] = f2cmax(r__2,r__3);
+/* L80: */
+	}
+	if (c__[j] > 0.f) {
+	    i__2 = (integer) (log(c__[j]) / logrdx);
+	    c__[j] = pow_ri(&radix, &i__2);
+	}
+/* L90: */
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.f;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	r__1 = rcmin, r__2 = c__[j];
+	rcmin = f2cmin(r__1,r__2);
+/* Computing MAX */
+	r__1 = rcmax, r__2 = c__[j];
+	rcmax = f2cmax(r__1,r__2);
+/* L100: */
+    }
+
+    if (rcmin == 0.f) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (c__[j] == 0.f) {
+		*info = *m + j;
+		return 0;
+	    }
+/* L110: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+/* Computing MAX */
+	    r__2 = c__[j];
+	    r__1 = f2cmax(r__2,smlnum);
+	    c__[j] = 1.f / f2cmin(r__1,bignum);
+/* L120: */
+	}
+
+/*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)). */
+
+	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+    return 0;
+
+/*     End of SGEEQUB */
+
+} /* sgeequb_ */
+
diff --git a/lapack-netlib/SRC/sgees.c b/lapack-netlib/SRC/sgees.c
new file mode 100644
index 000000000..2a4c81f5f
--- /dev/null
+++ b/lapack-netlib/SRC/sgees.c
@@ -0,0 +1,1001 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief <b> SGEES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors f
+or GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEES + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgees.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgees.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgees.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, */
+/*                         VS, LDVS, WORK, LWORK, BWORK, INFO ) */
+
+/*       CHARACTER          JOBVS, SORT */
+/*       INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM */
+/*       LOGICAL            BWORK( * ) */
+/*       REAL               A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), */
+/*      $                   WR( * ) */
+/*       LOGICAL            SELECT */
+/*       EXTERNAL           SELECT */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEES computes for an N-by-N real nonsymmetric matrix A, the */
+/* > eigenvalues, the real Schur form T, and, optionally, the matrix of */
+/* > Schur vectors Z.  This gives the Schur factorization A = Z*T*(Z**T). */
+/* > */
+/* > Optionally, it also orders the eigenvalues on the diagonal of the */
+/* > real Schur form so that selected eigenvalues are at the top left. */
+/* > The leading columns of Z then form an orthonormal basis for the */
+/* > invariant subspace corresponding to the selected eigenvalues. */
+/* > */
+/* > A matrix is in real Schur form if it is upper quasi-triangular with */
+/* > 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the */
+/* > form */
+/* >         [  a  b  ] */
+/* >         [  c  a  ] */
+/* > */
+/* > where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVS */
+/* > \verbatim */
+/* >          JOBVS is CHARACTER*1 */
+/* >          = 'N': Schur vectors are not computed; */
+/* >          = 'V': Schur vectors are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SORT */
+/* > \verbatim */
+/* >          SORT is CHARACTER*1 */
+/* >          Specifies whether or not to order the eigenvalues on the */
+/* >          diagonal of the Schur form. */
+/* >          = 'N': Eigenvalues are not ordered; */
+/* >          = 'S': Eigenvalues are ordered (see SELECT). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SELECT */
+/* > \verbatim */
+/* >          SELECT is a LOGICAL FUNCTION of two REAL arguments */
+/* >          SELECT must be declared EXTERNAL in the calling subroutine. */
+/* >          If SORT = 'S', SELECT is used to select eigenvalues to sort */
+/* >          to the top left of the Schur form. */
+/* >          If SORT = 'N', SELECT is not referenced. */
+/* >          An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */
+/* >          SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex */
+/* >          conjugate pair of eigenvalues is selected, then both complex */
+/* >          eigenvalues are selected. */
+/* >          Note that a selected complex eigenvalue may no longer */
+/* >          satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */
+/* >          ordering may change the value of complex eigenvalues */
+/* >          (especially if the eigenvalue is ill-conditioned); in this */
+/* >          case INFO is set to N+2 (see INFO below). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the N-by-N matrix A. */
+/* >          On exit, A has been overwritten by its real Schur form T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SDIM */
+/* > \verbatim */
+/* >          SDIM is INTEGER */
+/* >          If SORT = 'N', SDIM = 0. */
+/* >          If SORT = 'S', SDIM = number of eigenvalues (after sorting) */
+/* >                         for which SELECT is true. (Complex conjugate */
+/* >                         pairs for which SELECT is true for either */
+/* >                         eigenvalue count as 2.) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WR */
+/* > \verbatim */
+/* >          WR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WI */
+/* > \verbatim */
+/* >          WI is REAL array, dimension (N) */
+/* >          WR and WI contain the real and imaginary parts, */
+/* >          respectively, of the computed eigenvalues in the same order */
+/* >          that they appear on the diagonal of the output Schur form T. */
+/* >          Complex conjugate pairs of eigenvalues will appear */
+/* >          consecutively with the eigenvalue having the positive */
+/* >          imaginary part first. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VS */
+/* > \verbatim */
+/* >          VS is REAL array, dimension (LDVS,N) */
+/* >          If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */
+/* >          vectors. */
+/* >          If JOBVS = 'N', VS is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVS */
+/* > \verbatim */
+/* >          LDVS is INTEGER */
+/* >          The leading dimension of the array VS.  LDVS >= 1; if */
+/* >          JOBVS = 'V', LDVS >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) contains the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,3*N). */
+/* >          For good performance, LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BWORK */
+/* > \verbatim */
+/* >          BWORK is LOGICAL array, dimension (N) */
+/* >          Not referenced if SORT = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0: if INFO = i, and i is */
+/* >             <= N: the QR algorithm failed to compute all the */
+/* >                   eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */
+/* >                   contain those eigenvalues which have converged; if */
+/* >                   JOBVS = 'V', VS contains the matrix which reduces A */
+/* >                   to its partially converged Schur form. */
+/* >             = N+1: the eigenvalues could not be reordered because some */
+/* >                   eigenvalues were too close to separate (the problem */
+/* >                   is very ill-conditioned); */
+/* >             = N+2: after reordering, roundoff changed values of some */
+/* >                   complex eigenvalues so that leading eigenvalues in */
+/* >                   the Schur form no longer satisfy SELECT=.TRUE.  This */
+/* >                   could also be caused by underflow due to scaling. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup realGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgees_(char *jobvs, char *sort, L_fp select, integer *n, 
+	real *a, integer *lda, integer *sdim, real *wr, real *wi, real *vs, 
+	integer *ldvs, real *work, integer *lwork, logical *bwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer ibal;
+    real anrm;
+    integer idum[1], ierr, itau, iwrk, inxt, i__;
+    real s;
+    integer icond, ieval;
+    extern logical lsame_(char *, char *);
+    logical cursl;
+    integer i1, i2;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *), sswap_(integer *, real *, integer *, real *, integer *
+	    );
+    logical lst2sl;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    logical scalea;
+    integer ip;
+    real cscale;
+    extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, 
+	    integer *, integer *, real *, integer *);
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), xerbla_(char 
+	    *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, 
+	    real *, integer *);
+    logical lastsl;
+    extern /* Subroutine */ int sorghr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), shseqr_(char 
+	    *, char *, integer *, integer *, integer *, real *, integer *, 
+	    real *, real *, real *, integer *, real *, integer *, integer *);
+    integer minwrk, maxwrk;
+    real smlnum;
+    integer hswork;
+    extern /* Subroutine */ int strsen_(char *, char *, logical *, integer *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *, 
+	    real *, real *, real *, integer *, integer *, integer *, integer *
+	    );
+    logical wantst, lquery, wantvs;
+    integer ihi, ilo;
+    real dum[1], eps, sep;
+
+
+/*  -- LAPACK driver routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vs_dim1 = *ldvs;
+    vs_offset = 1 + vs_dim1 * 1;
+    vs -= vs_offset;
+    --work;
+    --bwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvs = lsame_(jobvs, "V");
+    wantst = lsame_(sort, "S");
+    if (! wantvs && ! lsame_(jobvs, "N")) {
+	*info = -1;
+    } else if (! wantst && ! lsame_(sort, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -6;
+    } else if (*ldvs < 1 || wantvs && *ldvs < *n) {
+	*info = -11;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+/*       HSWORK refers to the workspace preferred by SHSEQR, as */
+/*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */
+/*       the worst case.) */
+
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
+	} else {
+	    maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, 
+		    n, &c__0, (ftnlen)6, (ftnlen)1);
+	    minwrk = *n * 3;
+
+	    shseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1]
+		    , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval);
+	    hswork = work[1];
+
+	    if (! wantvs) {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    } else {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, 
+			"SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	}
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEES ", &i__1, (ftnlen)5);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	*sdim = 0;
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Permute the matrix to make it more nearly triangular */
+/*     (Workspace: need N) */
+
+    ibal = 1;
+    sgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*     Reduce to upper Hessenberg form */
+/*     (Workspace: need 3*N, prefer 2*N+N*NB) */
+
+    itau = *n + ibal;
+    iwrk = *n + itau;
+    i__1 = *lwork - iwrk + 1;
+    sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvs) {
+
+/*        Copy Householder vectors to VS */
+
+	slacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs)
+		;
+
+/*        Generate orthogonal matrix in VS */
+/*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+    }
+
+    *sdim = 0;
+
+/*     Perform QR iteration, accumulating Schur vectors in VS if desired */
+/*     (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+    iwrk = itau;
+    i__1 = *lwork - iwrk + 1;
+    shseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[
+	    vs_offset], ldvs, &work[iwrk], &i__1, &ieval);
+    if (ieval > 0) {
+	*info = ieval;
+    }
+
+/*     Sort eigenvalues if desired */
+
+    if (wantst && *info == 0) {
+	if (scalea) {
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, &
+		    ierr);
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, &
+		    ierr);
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    bwork[i__] = (*select)(&wr[i__], &wi[i__]);
+/* L10: */
+	}
+
+/*        Reorder eigenvalues and transform Schur vectors */
+/*        (Workspace: none needed) */
+
+	i__1 = *lwork - iwrk + 1;
+	strsen_("N", jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], 
+		ldvs, &wr[1], &wi[1], sdim, &s, &sep, &work[iwrk], &i__1, 
+		idum, &c__1, &icond);
+	if (icond > 0) {
+	    *info = *n + icond;
+	}
+    }
+
+    if (wantvs) {
+
+/*        Undo balancing */
+/*        (Workspace: need N) */
+
+	sgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs,
+		 &ierr);
+    }
+
+    if (scalea) {
+
+/*        Undo scaling for the Schur form of A */
+
+	slascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, &
+		ierr);
+	i__1 = *lda + 1;
+	scopy_(n, &a[a_offset], &i__1, &wr[1], &c__1);
+	if (cscale == smlnum) {
+
+/*           If scaling back towards underflow, adjust WI if an */
+/*           offdiagonal element of a 2-by-2 block in the Schur form */
+/*           underflows. */
+
+	    if (ieval > 0) {
+		i1 = ieval + 1;
+		i2 = ihi - 1;
+		i__1 = ilo - 1;
+/* Computing MAX */
+		i__3 = ilo - 1;
+		i__2 = f2cmax(i__3,1);
+		slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[
+			1], &i__2, &ierr);
+	    } else if (wantst) {
+		i1 = 1;
+		i2 = *n - 1;
+	    } else {
+		i1 = ilo;
+		i2 = ihi - 1;
+	    }
+	    inxt = i1 - 1;
+	    i__1 = i2;
+	    for (i__ = i1; i__ <= i__1; ++i__) {
+		if (i__ < inxt) {
+		    goto L20;
+		}
+		if (wi[i__] == 0.f) {
+		    inxt = i__ + 1;
+		} else {
+		    if (a[i__ + 1 + i__ * a_dim1] == 0.f) {
+			wi[i__] = 0.f;
+			wi[i__ + 1] = 0.f;
+		    } else if (a[i__ + 1 + i__ * a_dim1] != 0.f && a[i__ + (
+			    i__ + 1) * a_dim1] == 0.f) {
+			wi[i__] = 0.f;
+			wi[i__ + 1] = 0.f;
+			if (i__ > 1) {
+			    i__2 = i__ - 1;
+			    sswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[(
+				    i__ + 1) * a_dim1 + 1], &c__1);
+			}
+			if (*n > i__ + 1) {
+			    i__2 = *n - i__ - 1;
+			    sswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, &
+				    a[i__ + 1 + (i__ + 2) * a_dim1], lda);
+			}
+			if (wantvs) {
+			    sswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ 
+				    + 1) * vs_dim1 + 1], &c__1);
+			}
+			a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * 
+				a_dim1];
+			a[i__ + 1 + i__ * a_dim1] = 0.f;
+		    }
+		    inxt = i__ + 2;
+		}
+L20:
+		;
+	    }
+	}
+
+/*        Undo scaling for the imaginary part of the eigenvalues */
+
+	i__1 = *n - ieval;
+/* Computing MAX */
+	i__3 = *n - ieval;
+	i__2 = f2cmax(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + 
+		1], &i__2, &ierr);
+    }
+
+    if (wantst && *info == 0) {
+
+/*        Check if reordering successful */
+
+	lastsl = TRUE_;
+	lst2sl = TRUE_;
+	*sdim = 0;
+	ip = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    cursl = (*select)(&wr[i__], &wi[i__]);
+	    if (wi[i__] == 0.f) {
+		if (cursl) {
+		    ++(*sdim);
+		}
+		ip = 0;
+		if (cursl && ! lastsl) {
+		    *info = *n + 2;
+		}
+	    } else {
+		if (ip == 1) {
+
+/*                 Last eigenvalue of conjugate pair */
+
+		    cursl = cursl || lastsl;
+		    lastsl = cursl;
+		    if (cursl) {
+			*sdim += 2;
+		    }
+		    ip = -1;
+		    if (cursl && ! lst2sl) {
+			*info = *n + 2;
+		    }
+		} else {
+
+/*                 First eigenvalue of conjugate pair */
+
+		    ip = 1;
+		}
+	    }
+	    lst2sl = lastsl;
+	    lastsl = cursl;
+/* L30: */
+	}
+    }
+
+    work[1] = (real) maxwrk;
+    return 0;
+
+/*     End of SGEES */
+
+} /* sgees_ */
+
diff --git a/lapack-netlib/SRC/sgeesx.c b/lapack-netlib/SRC/sgeesx.c
new file mode 100644
index 000000000..ff8bffda6
--- /dev/null
+++ b/lapack-netlib/SRC/sgeesx.c
@@ -0,0 +1,1120 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief <b> SGEESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors 
+for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEESX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeesx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeesx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeesx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, */
+/*                          WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, */
+/*                          IWORK, LIWORK, BWORK, INFO ) */
+
+/*       CHARACTER          JOBVS, SENSE, SORT */
+/*       INTEGER            INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM */
+/*       REAL               RCONDE, RCONDV */
+/*       LOGICAL            BWORK( * ) */
+/*       INTEGER            IWORK( * ) */
+/*       REAL               A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), */
+/*      $                   WR( * ) */
+/*       LOGICAL            SELECT */
+/*       EXTERNAL           SELECT */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEESX computes for an N-by-N real nonsymmetric matrix A, the */
+/* > eigenvalues, the real Schur form T, and, optionally, the matrix of */
+/* > Schur vectors Z.  This gives the Schur factorization A = Z*T*(Z**T). */
+/* > */
+/* > Optionally, it also orders the eigenvalues on the diagonal of the */
+/* > real Schur form so that selected eigenvalues are at the top left; */
+/* > computes a reciprocal condition number for the average of the */
+/* > selected eigenvalues (RCONDE); and computes a reciprocal condition */
+/* > number for the right invariant subspace corresponding to the */
+/* > selected eigenvalues (RCONDV).  The leading columns of Z form an */
+/* > orthonormal basis for this invariant subspace. */
+/* > */
+/* > For further explanation of the reciprocal condition numbers RCONDE */
+/* > and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where */
+/* > these quantities are called s and sep respectively). */
+/* > */
+/* > A real matrix is in real Schur form if it is upper quasi-triangular */
+/* > with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in */
+/* > the form */
+/* >           [  a  b  ] */
+/* >           [  c  a  ] */
+/* > */
+/* > where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVS */
+/* > \verbatim */
+/* >          JOBVS is CHARACTER*1 */
+/* >          = 'N': Schur vectors are not computed; */
+/* >          = 'V': Schur vectors are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SORT */
+/* > \verbatim */
+/* >          SORT is CHARACTER*1 */
+/* >          Specifies whether or not to order the eigenvalues on the */
+/* >          diagonal of the Schur form. */
+/* >          = 'N': Eigenvalues are not ordered; */
+/* >          = 'S': Eigenvalues are ordered (see SELECT). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SELECT */
+/* > \verbatim */
+/* >          SELECT is a LOGICAL FUNCTION of two REAL arguments */
+/* >          SELECT must be declared EXTERNAL in the calling subroutine. */
+/* >          If SORT = 'S', SELECT is used to select eigenvalues to sort */
+/* >          to the top left of the Schur form. */
+/* >          If SORT = 'N', SELECT is not referenced. */
+/* >          An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */
+/* >          SELECT(WR(j),WI(j)) is true; i.e., if either one of a */
+/* >          complex conjugate pair of eigenvalues is selected, then both */
+/* >          are.  Note that a selected complex eigenvalue may no longer */
+/* >          satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */
+/* >          ordering may change the value of complex eigenvalues */
+/* >          (especially if the eigenvalue is ill-conditioned); in this */
+/* >          case INFO may be set to N+3 (see INFO below). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SENSE */
+/* > \verbatim */
+/* >          SENSE is CHARACTER*1 */
+/* >          Determines which reciprocal condition numbers are computed. */
+/* >          = 'N': None are computed; */
+/* >          = 'E': Computed for average of selected eigenvalues only; */
+/* >          = 'V': Computed for selected right invariant subspace only; */
+/* >          = 'B': Computed for both. */
+/* >          If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the N-by-N matrix A. */
+/* >          On exit, A is overwritten by its real Schur form T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SDIM */
+/* > \verbatim */
+/* >          SDIM is INTEGER */
+/* >          If SORT = 'N', SDIM = 0. */
+/* >          If SORT = 'S', SDIM = number of eigenvalues (after sorting) */
+/* >                         for which SELECT is true. (Complex conjugate */
+/* >                         pairs for which SELECT is true for either */
+/* >                         eigenvalue count as 2.) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WR */
+/* > \verbatim */
+/* >          WR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WI */
+/* > \verbatim */
+/* >          WI is REAL array, dimension (N) */
+/* >          WR and WI contain the real and imaginary parts, respectively, */
+/* >          of the computed eigenvalues, in the same order that they */
+/* >          appear on the diagonal of the output Schur form T.  Complex */
+/* >          conjugate pairs of eigenvalues appear consecutively with the */
+/* >          eigenvalue having the positive imaginary part first. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VS */
+/* > \verbatim */
+/* >          VS is REAL array, dimension (LDVS,N) */
+/* >          If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */
+/* >          vectors. */
+/* >          If JOBVS = 'N', VS is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVS */
+/* > \verbatim */
+/* >          LDVS is INTEGER */
+/* >          The leading dimension of the array VS.  LDVS >= 1, and if */
+/* >          JOBVS = 'V', LDVS >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDE */
+/* > \verbatim */
+/* >          RCONDE is REAL */
+/* >          If SENSE = 'E' or 'B', RCONDE contains the reciprocal */
+/* >          condition number for the average of the selected eigenvalues. */
+/* >          Not referenced if SENSE = 'N' or 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDV */
+/* > \verbatim */
+/* >          RCONDV is REAL */
+/* >          If SENSE = 'V' or 'B', RCONDV contains the reciprocal */
+/* >          condition number for the selected right invariant subspace. */
+/* >          Not referenced if SENSE = 'N' or 'E'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,3*N). */
+/* >          Also, if SENSE = 'E' or 'V' or 'B', */
+/* >          LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of */
+/* >          selected eigenvalues computed by this routine.  Note that */
+/* >          N+2*SDIM*(N-SDIM) <= N+N*N/2. Note also that an error is only */
+/* >          returned if LWORK < f2cmax(1,3*N), but if SENSE = 'E' or 'V' or */
+/* >          'B' this may not be large enough. */
+/* >          For good performance, LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates upper bounds on the optimal sizes of the */
+/* >          arrays WORK and IWORK, returns these values as the first */
+/* >          entries of the WORK and IWORK arrays, and no error messages */
+/* >          related to LWORK or LIWORK are issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
+/* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LIWORK */
+/* > \verbatim */
+/* >          LIWORK is INTEGER */
+/* >          The dimension of the array IWORK. */
+/* >          LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). */
+/* >          Note that SDIM*(N-SDIM) <= N*N/4. Note also that an error is */
+/* >          only returned if LIWORK < 1, but if SENSE = 'V' or 'B' this */
+/* >          may not be large enough. */
+/* > */
+/* >          If LIWORK = -1, then a workspace query is assumed; the */
+/* >          routine only calculates upper bounds on the optimal sizes of */
+/* >          the arrays WORK and IWORK, returns these values as the first */
+/* >          entries of the WORK and IWORK arrays, and no error messages */
+/* >          related to LWORK or LIWORK are issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BWORK */
+/* > \verbatim */
+/* >          BWORK is LOGICAL array, dimension (N) */
+/* >          Not referenced if SORT = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0: if INFO = i, and i is */
+/* >             <= N: the QR algorithm failed to compute all the */
+/* >                   eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */
+/* >                   contain those eigenvalues which have converged; if */
+/* >                   JOBVS = 'V', VS contains the transformation which */
+/* >                   reduces A to its partially converged Schur form. */
+/* >             = N+1: the eigenvalues could not be reordered because some */
+/* >                   eigenvalues were too close to separate (the problem */
+/* >                   is very ill-conditioned); */
+/* >             = N+2: after reordering, roundoff changed values of some */
+/* >                   complex eigenvalues so that leading eigenvalues in */
+/* >                   the Schur form no longer satisfy SELECT=.TRUE.  This */
+/* >                   could also be caused by underflow due to scaling. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup realGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgeesx_(char *jobvs, char *sort, L_fp select, char *
+	sense, integer *n, real *a, integer *lda, integer *sdim, real *wr, 
+	real *wi, real *vs, integer *ldvs, real *rconde, real *rcondv, real *
+	work, integer *lwork, integer *iwork, integer *liwork, logical *bwork,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer ibal;
+    real anrm;
+    integer ierr, itau, iwrk, lwrk, inxt, i__, icond, ieval;
+    extern logical lsame_(char *, char *);
+    logical cursl;
+    integer liwrk, i1, i2;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *), sswap_(integer *, real *, integer *, real *, integer *
+	    );
+    logical lst2sl;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    logical scalea;
+    integer ip;
+    real cscale;
+    extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, 
+	    integer *, integer *, real *, integer *);
+    extern real slamch_(char *);
+    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), xerbla_(char 
+	    *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern real slange_(char *, integer *, integer *, real *, integer *, real 
+	    *);
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, 
+	    real *, integer *);
+    logical wantsb, wantse, lastsl;
+    extern /* Subroutine */ int sorghr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), shseqr_(char 
+	    *, char *, integer *, integer *, integer *, real *, integer *, 
+	    real *, real *, real *, integer *, real *, integer *, integer *);
+    integer minwrk, maxwrk;
+    logical wantsn;
+    real smlnum;
+    integer hswork;
+    extern /* Subroutine */ int strsen_(char *, char *, logical *, integer *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *, 
+	    real *, real *, real *, integer *, integer *, integer *, integer *
+	    );
+    logical wantst, lquery, wantsv, wantvs;
+    integer ihi, ilo;
+    real dum[1], eps;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vs_dim1 = *ldvs;
+    vs_offset = 1 + vs_dim1 * 1;
+    vs -= vs_offset;
+    --work;
+    --iwork;
+    --bwork;
+
+    /* Function Body */
+    *info = 0;
+    wantvs = lsame_(jobvs, "V");
+    wantst = lsame_(sort, "S");
+    wantsn = lsame_(sense, "N");
+    wantse = lsame_(sense, "E");
+    wantsv = lsame_(sense, "V");
+    wantsb = lsame_(sense, "B");
+    lquery = *lwork == -1 || *liwork == -1;
+
+    if (! wantvs && ! lsame_(jobvs, "N")) {
+	*info = -1;
+    } else if (! wantst && ! lsame_(sort, "N")) {
+	*info = -2;
+    } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! 
+	    wantsn) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldvs < 1 || wantvs && *ldvs < *n) {
+	*info = -12;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "RWorkspace:" describe the */
+/*       minimal amount of real workspace needed at that point in the */
+/*       code, as well as the preferred amount for good performance. */
+/*       IWorkspace refers to integer workspace. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+/*       HSWORK refers to the workspace preferred by SHSEQR, as */
+/*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */
+/*       the worst case. */
+/*       If SENSE = 'E', 'V' or 'B', then the amount of workspace needed */
+/*       depends on SDIM, which is computed by the routine STRSEN later */
+/*       in the code.) */
+
+    if (*info == 0) {
+	liwrk = 1;
+	if (*n == 0) {
+	    minwrk = 1;
+	    lwrk = 1;
+	} else {
+	    maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, 
+		    n, &c__0, (ftnlen)6, (ftnlen)1);
+	    minwrk = *n * 3;
+
+	    shseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1]
+		    , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval);
+	    hswork = work[1];
+
+	    if (! wantvs) {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    } else {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, 
+			"SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	    lwrk = maxwrk;
+	    if (! wantsn) {
+/* Computing MAX */
+		i__1 = lwrk, i__2 = *n + *n * *n / 2;
+		lwrk = f2cmax(i__1,i__2);
+	    }
+	    if (wantsv || wantsb) {
+		liwrk = *n * *n / 4;
+	    }
+	}
+	iwork[1] = liwrk;
+	work[1] = (real) lwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -16;
+	} else if (*liwork < 1 && ! lquery) {
+	    *info = -18;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEESX", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	*sdim = 0;
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Permute the matrix to make it more nearly triangular */
+/*     (RWorkspace: need N) */
+
+    ibal = 1;
+    sgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*     Reduce to upper Hessenberg form */
+/*     (RWorkspace: need 3*N, prefer 2*N+N*NB) */
+
+    itau = *n + ibal;
+    iwrk = *n + itau;
+    i__1 = *lwork - iwrk + 1;
+    sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvs) {
+
+/*        Copy Householder vectors to VS */
+
+	slacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs)
+		;
+
+/*        Generate orthogonal matrix in VS */
+/*        (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+    }
+
+    *sdim = 0;
+
+/*     Perform QR iteration, accumulating Schur vectors in VS if desired */
+/*     (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+    iwrk = itau;
+    i__1 = *lwork - iwrk + 1;
+    shseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[
+	    vs_offset], ldvs, &work[iwrk], &i__1, &ieval);
+    if (ieval > 0) {
+	*info = ieval;
+    }
+
+/*     Sort eigenvalues if desired */
+
+    if (wantst && *info == 0) {
+	if (scalea) {
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, &
+		    ierr);
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, &
+		    ierr);
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    bwork[i__] = (*select)(&wr[i__], &wi[i__]);
+/* L10: */
+	}
+
+/*        Reorder eigenvalues, transform Schur vectors, and compute */
+/*        reciprocal condition numbers */
+/*        (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) */
+/*                     otherwise, need N ) */
+/*        (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) */
+/*                     otherwise, need 0 ) */
+
+	i__1 = *lwork - iwrk + 1;
+	strsen_(sense, jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset],
+		 ldvs, &wr[1], &wi[1], sdim, rconde, rcondv, &work[iwrk], &
+		i__1, &iwork[1], liwork, &icond);
+	if (! wantsn) {
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + (*sdim << 1) * (*n - *sdim);
+	    maxwrk = f2cmax(i__1,i__2);
+	}
+	if (icond == -15) {
+
+/*           Not enough real workspace */
+
+	    *info = -16;
+	} else if (icond == -17) {
+
+/*           Not enough integer workspace */
+
+	    *info = -18;
+	} else if (icond > 0) {
+
+/*           STRSEN failed to reorder or to restore standard Schur form */
+
+	    *info = icond + *n;
+	}
+    }
+
+    if (wantvs) {
+
+/*        Undo balancing */
+/*        (RWorkspace: need N) */
+
+	sgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs,
+		 &ierr);
+    }
+
+    if (scalea) {
+
+/*        Undo scaling for the Schur form of A */
+
+	slascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, &
+		ierr);
+	i__1 = *lda + 1;
+	scopy_(n, &a[a_offset], &i__1, &wr[1], &c__1);
+	if ((wantsv || wantsb) && *info == 0) {
+	    dum[0] = *rcondv;
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &
+		    c__1, &ierr);
+	    *rcondv = dum[0];
+	}
+	if (cscale == smlnum) {
+
+/*           If scaling back towards underflow, adjust WI if an */
+/*           offdiagonal element of a 2-by-2 block in the Schur form */
+/*           underflows. */
+
+	    if (ieval > 0) {
+		i1 = ieval + 1;
+		i2 = ihi - 1;
+		i__1 = ilo - 1;
+		slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[
+			1], n, &ierr);
+	    } else if (wantst) {
+		i1 = 1;
+		i2 = *n - 1;
+	    } else {
+		i1 = ilo;
+		i2 = ihi - 1;
+	    }
+	    inxt = i1 - 1;
+	    i__1 = i2;
+	    for (i__ = i1; i__ <= i__1; ++i__) {
+		if (i__ < inxt) {
+		    goto L20;
+		}
+		if (wi[i__] == 0.f) {
+		    inxt = i__ + 1;
+		} else {
+		    if (a[i__ + 1 + i__ * a_dim1] == 0.f) {
+			wi[i__] = 0.f;
+			wi[i__ + 1] = 0.f;
+		    } else if (a[i__ + 1 + i__ * a_dim1] != 0.f && a[i__ + (
+			    i__ + 1) * a_dim1] == 0.f) {
+			wi[i__] = 0.f;
+			wi[i__ + 1] = 0.f;
+			if (i__ > 1) {
+			    i__2 = i__ - 1;
+			    sswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[(
+				    i__ + 1) * a_dim1 + 1], &c__1);
+			}
+			if (*n > i__ + 1) {
+			    i__2 = *n - i__ - 1;
+			    sswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, &
+				    a[i__ + 1 + (i__ + 2) * a_dim1], lda);
+			}
+			if (wantvs) {
+			    sswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ 
+				    + 1) * vs_dim1 + 1], &c__1);
+			}
+			a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * 
+				a_dim1];
+			a[i__ + 1 + i__ * a_dim1] = 0.f;
+		    }
+		    inxt = i__ + 2;
+		}
+L20:
+		;
+	    }
+	}
+	i__1 = *n - ieval;
+/* Computing MAX */
+	i__3 = *n - ieval;
+	i__2 = f2cmax(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + 
+		1], &i__2, &ierr);
+    }
+
+    if (wantst && *info == 0) {
+
+/*        Check if reordering successful */
+
+	lastsl = TRUE_;
+	lst2sl = TRUE_;
+	*sdim = 0;
+	ip = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    cursl = (*select)(&wr[i__], &wi[i__]);
+	    if (wi[i__] == 0.f) {
+		if (cursl) {
+		    ++(*sdim);
+		}
+		ip = 0;
+		if (cursl && ! lastsl) {
+		    *info = *n + 2;
+		}
+	    } else {
+		if (ip == 1) {
+
+/*                 Last eigenvalue of conjugate pair */
+
+		    cursl = cursl || lastsl;
+		    lastsl = cursl;
+		    if (cursl) {
+			*sdim += 2;
+		    }
+		    ip = -1;
+		    if (cursl && ! lst2sl) {
+			*info = *n + 2;
+		    }
+		} else {
+
+/*                 First eigenvalue of conjugate pair */
+
+		    ip = 1;
+		}
+	    }
+	    lst2sl = lastsl;
+	    lastsl = cursl;
+/* L30: */
+	}
+    }
+
+    work[1] = (real) maxwrk;
+    if (wantsv || wantsb) {
+	iwork[1] = *sdim * (*n - *sdim);
+    } else {
+	iwork[1] = 1;
+    }
+
+    return 0;
+
+/*     End of SGEESX */
+
+} /* sgeesx_ */
+
diff --git a/lapack-netlib/SRC/sgeev.c b/lapack-netlib/SRC/sgeev.c
new file mode 100644
index 000000000..626b1d577
--- /dev/null
+++ b/lapack-netlib/SRC/sgeev.c
@@ -0,0 +1,1029 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief <b> SGEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matr
+ices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEEV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeev.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeev.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeev.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, */
+/*                         LDVR, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          JOBVL, JOBVR */
+/*       INTEGER            INFO, LDA, LDVL, LDVR, LWORK, N */
+/*       REAL   A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), */
+/*      $                   WI( * ), WORK( * ), WR( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEEV computes for an N-by-N real nonsymmetric matrix A, the */
+/* > eigenvalues and, optionally, the left and/or right eigenvectors. */
+/* > */
+/* > The right eigenvector v(j) of A satisfies */
+/* >                  A * v(j) = lambda(j) * v(j) */
+/* > where lambda(j) is its eigenvalue. */
+/* > The left eigenvector u(j) of A satisfies */
+/* >               u(j)**H * A = lambda(j) * u(j)**H */
+/* > where u(j)**H denotes the conjugate-transpose of u(j). */
+/* > */
+/* > The computed eigenvectors are normalized to have Euclidean norm */
+/* > equal to 1 and largest component real. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVL */
+/* > \verbatim */
+/* >          JOBVL is CHARACTER*1 */
+/* >          = 'N': left eigenvectors of A are not computed; */
+/* >          = 'V': left eigenvectors of A are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVR */
+/* > \verbatim */
+/* >          JOBVR is CHARACTER*1 */
+/* >          = 'N': right eigenvectors of A are not computed; */
+/* >          = 'V': right eigenvectors of A are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the N-by-N matrix A. */
+/* >          On exit, A has been overwritten. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WR */
+/* > \verbatim */
+/* >          WR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WI */
+/* > \verbatim */
+/* >          WI is REAL array, dimension (N) */
+/* >          WR and WI contain the real and imaginary parts, */
+/* >          respectively, of the computed eigenvalues.  Complex */
+/* >          conjugate pairs of eigenvalues appear consecutively */
+/* >          with the eigenvalue having the positive imaginary part */
+/* >          first. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VL */
+/* > \verbatim */
+/* >          VL is REAL array, dimension (LDVL,N) */
+/* >          If JOBVL = 'V', the left eigenvectors u(j) are stored one */
+/* >          after another in the columns of VL, in the same order */
+/* >          as their eigenvalues. */
+/* >          If JOBVL = 'N', VL is not referenced. */
+/* >          If the j-th eigenvalue is real, then u(j) = VL(:,j), */
+/* >          the j-th column of VL. */
+/* >          If the j-th and (j+1)-st eigenvalues form a complex */
+/* >          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */
+/* >          u(j+1) = VL(:,j) - i*VL(:,j+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVL */
+/* > \verbatim */
+/* >          LDVL is INTEGER */
+/* >          The leading dimension of the array VL.  LDVL >= 1; if */
+/* >          JOBVL = 'V', LDVL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VR */
+/* > \verbatim */
+/* >          VR is REAL array, dimension (LDVR,N) */
+/* >          If JOBVR = 'V', the right eigenvectors v(j) are stored one */
+/* >          after another in the columns of VR, in the same order */
+/* >          as their eigenvalues. */
+/* >          If JOBVR = 'N', VR is not referenced. */
+/* >          If the j-th eigenvalue is real, then v(j) = VR(:,j), */
+/* >          the j-th column of VR. */
+/* >          If the j-th and (j+1)-st eigenvalues form a complex */
+/* >          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */
+/* >          v(j+1) = VR(:,j) - i*VR(:,j+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVR */
+/* > \verbatim */
+/* >          LDVR is INTEGER */
+/* >          The leading dimension of the array VR.  LDVR >= 1; if */
+/* >          JOBVR = 'V', LDVR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,3*N), and */
+/* >          if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good */
+/* >          performance, LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if INFO = i, the QR algorithm failed to compute all the */
+/* >                eigenvalues, and no eigenvectors have been computed; */
+/* >                elements i+1:N of WR and WI contain eigenvalues which */
+/* >                have converged. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/*  @generated from dgeev.f, fortran d -> s, Tue Apr 19 01:47:44 2016 */
+
+/* > \ingroup realGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgeev_(char *jobvl, char *jobvr, integer *n, real *a, 
+	integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr, 
+	integer *ldvr, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
+	    i__2, i__3;
+    real r__1, r__2;
+
+    /* Local variables */
+    integer ibal;
+    char side[1];
+    real anrm;
+    integer ierr, itau, iwrk, nout;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
+	    integer *, real *, real *);
+    extern real snrm2_(integer *, real *, integer *);
+    integer i__, k;
+    real r__;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    extern real slapy2_(real *, real *);
+    real cs;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    logical scalea;
+    real cscale;
+    extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, 
+	    integer *, integer *, real *, integer *);
+    real sn;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), xerbla_(char 
+	    *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    logical select[1];
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *), slartg_(real *, real *, 
+	    real *, real *, real *), sorghr_(integer *, integer *, integer *, 
+	    real *, integer *, real *, real *, integer *, integer *), shseqr_(
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, real *, real *, integer *, real *, integer *, integer *);
+    integer minwrk, maxwrk;
+    logical wantvl;
+    real smlnum;
+    integer hswork;
+    logical lquery, wantvr;
+    extern /* Subroutine */ int strevc3_(char *, char *, logical *, integer *,
+	     real *, integer *, real *, integer *, real *, integer *, integer 
+	    *, integer *, real *, integer *, integer *);
+    integer ihi;
+    real scl;
+    integer ilo;
+    real dum[1], eps;
+    integer lwork_trevc__;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1 * 1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1 * 1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -1;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
+	*info = -9;
+    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
+	*info = -11;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+/*       HSWORK refers to the workspace preferred by SHSEQR, as */
+/*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */
+/*       the worst case.) */
+
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
+	} else {
+	    maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, 
+		    n, &c__0, (ftnlen)6, (ftnlen)1);
+	    if (wantvl) {
+		minwrk = *n << 2;
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, 
+			"SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = f2cmax(i__1,i__2);
+		shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vl[vl_offset], ldvl, &work[1], &c_n1, info);
+		hswork = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+		strevc3_("L", "B", select, n, &a[a_offset], lda, &vl[
+			vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &
+			work[1], &c_n1, &ierr);
+		lwork_trevc__ = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_trevc__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n << 2;
+		maxwrk = f2cmax(i__1,i__2);
+	    } else if (wantvr) {
+		minwrk = *n << 2;
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, 
+			"SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = f2cmax(i__1,i__2);
+		shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+		hswork = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+		strevc3_("R", "B", select, n, &a[a_offset], lda, &vl[
+			vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &
+			work[1], &c_n1, &ierr);
+		lwork_trevc__ = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_trevc__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n << 2;
+		maxwrk = f2cmax(i__1,i__2);
+	    } else {
+		minwrk = *n * 3;
+		shseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+		hswork = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	    maxwrk = f2cmax(maxwrk,minwrk);
+	}
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEEV ", &i__1, (ftnlen)5);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Balance the matrix */
+/*     (Workspace: need N) */
+
+    ibal = 1;
+    sgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*     Reduce to upper Hessenberg form */
+/*     (Workspace: need 3*N, prefer 2*N+N*NB) */
+
+    itau = ibal + *n;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvl) {
+
+/*        Want left eigenvectors */
+/*        Copy Householder vectors to VL */
+
+	*(unsigned char *)side = 'L';
+	slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*        Generate orthogonal matrix in VL */
+/*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*        Perform QR iteration, accumulating Schur vectors in VL */
+/*        (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vl[vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*           Want left and right eigenvectors */
+/*           Copy Schur vectors to VR */
+
+	    *(unsigned char *)side = 'B';
+	    slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*        Want right eigenvectors */
+/*        Copy Householder vectors to VR */
+
+	*(unsigned char *)side = 'R';
+	slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*        Generate orthogonal matrix in VR */
+/*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*        Perform QR iteration, accumulating Schur vectors in VR */
+/*        (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*        Compute eigenvalues only */
+/*        (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO .NE. 0 from SHSEQR, then quit */
+
+    if (*info != 0) {
+	goto L50;
+    }
+
+    if (wantvl || wantvr) {
+
+/*        Compute left and/or right eigenvectors */
+/*        (Workspace: need 4*N, prefer N + N + 2*N*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	strevc3_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], 
+		ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &i__1, &
+		ierr);
+    }
+
+    if (wantvl) {
+
+/*        Undo balancing of left eigenvectors */
+/*        (Workspace: need N) */
+
+	sgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
+		 &ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.f) {
+		scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.f) {
+		r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		scl = 1.f / slapy2_(&r__1, &r__2);
+		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    r__1 = vl[k + i__ * vl_dim1];
+/* Computing 2nd power */
+		    r__2 = vl[k + (i__ + 1) * vl_dim1];
+		    work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2;
+/* L10: */
+		}
+		k = isamax_(n, &work[iwrk], &c__1);
+		slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], 
+			&cs, &sn, &r__);
+		srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * 
+			vl_dim1 + 1], &c__1, &cs, &sn);
+		vl[k + (i__ + 1) * vl_dim1] = 0.f;
+	    }
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*        Undo balancing of right eigenvectors */
+/*        (Workspace: need N) */
+
+	sgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
+		 &ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.f) {
+		scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.f) {
+		r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		scl = 1.f / slapy2_(&r__1, &r__2);
+		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    r__1 = vr[k + i__ * vr_dim1];
+/* Computing 2nd power */
+		    r__2 = vr[k + (i__ + 1) * vr_dim1];
+		    work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2;
+/* L30: */
+		}
+		k = isamax_(n, &work[iwrk], &c__1);
+		slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], 
+			&cs, &sn, &r__);
+		srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * 
+			vr_dim1 + 1], &c__1, &cs, &sn);
+		vr[k + (i__ + 1) * vr_dim1] = 0.f;
+	    }
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = f2cmax(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 
+		1], &i__2, &ierr);
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = f2cmax(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 
+		1], &i__2, &ierr);
+	if (*info > 0) {
+	    i__1 = ilo - 1;
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], 
+		    n, &ierr);
+	    i__1 = ilo - 1;
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], 
+		    n, &ierr);
+	}
+    }
+
+    work[1] = (real) maxwrk;
+    return 0;
+
+/*     End of SGEEV */
+
+} /* sgeev_ */
+
diff --git a/lapack-netlib/SRC/sgeevx.c b/lapack-netlib/SRC/sgeevx.c
new file mode 100644
index 000000000..89648a794
--- /dev/null
+++ b/lapack-netlib/SRC/sgeevx.c
@@ -0,0 +1,1197 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief <b> SGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat
+rices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEEVX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeevx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeevx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeevx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, */
+/*                          VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, */
+/*                          RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) */
+
+/*       CHARACTER          BALANC, JOBVL, JOBVR, SENSE */
+/*       INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N */
+/*       REAL               ABNRM */
+/*       INTEGER            IWORK( * ) */
+/*       REAL               A( LDA, * ), RCONDE( * ), RCONDV( * ), */
+/*      $                   SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), */
+/*      $                   WI( * ), WORK( * ), WR( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEEVX computes for an N-by-N real nonsymmetric matrix A, the */
+/* > eigenvalues and, optionally, the left and/or right eigenvectors. */
+/* > */
+/* > Optionally also, it computes a balancing transformation to improve */
+/* > the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */
+/* > SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues */
+/* > (RCONDE), and reciprocal condition numbers for the right */
+/* > eigenvectors (RCONDV). */
+/* > */
+/* > The right eigenvector v(j) of A satisfies */
+/* >                  A * v(j) = lambda(j) * v(j) */
+/* > where lambda(j) is its eigenvalue. */
+/* > The left eigenvector u(j) of A satisfies */
+/* >               u(j)**H * A = lambda(j) * u(j)**H */
+/* > where u(j)**H denotes the conjugate-transpose of u(j). */
+/* > */
+/* > The computed eigenvectors are normalized to have Euclidean norm */
+/* > equal to 1 and largest component real. */
+/* > */
+/* > Balancing a matrix means permuting the rows and columns to make it */
+/* > more nearly upper triangular, and applying a diagonal similarity */
+/* > transformation D * A * D**(-1), where D is a diagonal matrix, to */
+/* > make its rows and columns closer in norm and the condition numbers */
+/* > of its eigenvalues and eigenvectors smaller.  The computed */
+/* > reciprocal condition numbers correspond to the balanced matrix. */
+/* > Permuting rows and columns will not change the condition numbers */
+/* > (in exact arithmetic) but diagonal scaling will.  For further */
+/* > explanation of balancing, see section 4.10.2 of the LAPACK */
+/* > Users' Guide. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] BALANC */
+/* > \verbatim */
+/* >          BALANC is CHARACTER*1 */
+/* >          Indicates how the input matrix should be diagonally scaled */
+/* >          and/or permuted to improve the conditioning of its */
+/* >          eigenvalues. */
+/* >          = 'N': Do not diagonally scale or permute; */
+/* >          = 'P': Perform permutations to make the matrix more nearly */
+/* >                 upper triangular. Do not diagonally scale; */
+/* >          = 'S': Diagonally scale the matrix, i.e. replace A by */
+/* >                 D*A*D**(-1), where D is a diagonal matrix chosen */
+/* >                 to make the rows and columns of A more equal in */
+/* >                 norm. Do not permute; */
+/* >          = 'B': Both diagonally scale and permute A. */
+/* > */
+/* >          Computed reciprocal condition numbers will be for the matrix */
+/* >          after balancing and/or permuting. Permuting does not change */
+/* >          condition numbers (in exact arithmetic), but balancing does. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVL */
+/* > \verbatim */
+/* >          JOBVL is CHARACTER*1 */
+/* >          = 'N': left eigenvectors of A are not computed; */
+/* >          = 'V': left eigenvectors of A are computed. */
+/* >          If SENSE = 'E' or 'B', JOBVL must = 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVR */
+/* > \verbatim */
+/* >          JOBVR is CHARACTER*1 */
+/* >          = 'N': right eigenvectors of A are not computed; */
+/* >          = 'V': right eigenvectors of A are computed. */
+/* >          If SENSE = 'E' or 'B', JOBVR must = 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SENSE */
+/* > \verbatim */
+/* >          SENSE is CHARACTER*1 */
+/* >          Determines which reciprocal condition numbers are computed. */
+/* >          = 'N': None are computed; */
+/* >          = 'E': Computed for eigenvalues only; */
+/* >          = 'V': Computed for right eigenvectors only; */
+/* >          = 'B': Computed for eigenvalues and right eigenvectors. */
+/* > */
+/* >          If SENSE = 'E' or 'B', both left and right eigenvectors */
+/* >          must also be computed (JOBVL = 'V' and JOBVR = 'V'). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the N-by-N matrix A. */
+/* >          On exit, A has been overwritten.  If JOBVL = 'V' or */
+/* >          JOBVR = 'V', A contains the real Schur form of the balanced */
+/* >          version of the input matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WR */
+/* > \verbatim */
+/* >          WR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WI */
+/* > \verbatim */
+/* >          WI is REAL array, dimension (N) */
+/* >          WR and WI contain the real and imaginary parts, */
+/* >          respectively, of the computed eigenvalues.  Complex */
+/* >          conjugate pairs of eigenvalues will appear consecutively */
+/* >          with the eigenvalue having the positive imaginary part */
+/* >          first. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VL */
+/* > \verbatim */
+/* >          VL is REAL array, dimension (LDVL,N) */
+/* >          If JOBVL = 'V', the left eigenvectors u(j) are stored one */
+/* >          after another in the columns of VL, in the same order */
+/* >          as their eigenvalues. */
+/* >          If JOBVL = 'N', VL is not referenced. */
+/* >          If the j-th eigenvalue is real, then u(j) = VL(:,j), */
+/* >          the j-th column of VL. */
+/* >          If the j-th and (j+1)-st eigenvalues form a complex */
+/* >          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */
+/* >          u(j+1) = VL(:,j) - i*VL(:,j+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVL */
+/* > \verbatim */
+/* >          LDVL is INTEGER */
+/* >          The leading dimension of the array VL.  LDVL >= 1; if */
+/* >          JOBVL = 'V', LDVL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VR */
+/* > \verbatim */
+/* >          VR is REAL array, dimension (LDVR,N) */
+/* >          If JOBVR = 'V', the right eigenvectors v(j) are stored one */
+/* >          after another in the columns of VR, in the same order */
+/* >          as their eigenvalues. */
+/* >          If JOBVR = 'N', VR is not referenced. */
+/* >          If the j-th eigenvalue is real, then v(j) = VR(:,j), */
+/* >          the j-th column of VR. */
+/* >          If the j-th and (j+1)-st eigenvalues form a complex */
+/* >          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */
+/* >          v(j+1) = VR(:,j) - i*VR(:,j+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVR */
+/* > \verbatim */
+/* >          LDVR is INTEGER */
+/* >          The leading dimension of the array VR.  LDVR >= 1, and if */
+/* >          JOBVR = 'V', LDVR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          ILO and IHI are integer values determined when A was */
+/* >          balanced.  The balanced A(i,j) = 0 if I > J and */
+/* >          J = 1,...,ILO-1 or I = IHI+1,...,N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is REAL array, dimension (N) */
+/* >          Details of the permutations and scaling factors applied */
+/* >          when balancing A.  If P(j) is the index of the row and column */
+/* >          interchanged with row and column j, and D(j) is the scaling */
+/* >          factor applied to row and column j, then */
+/* >          SCALE(J) = P(J),    for J = 1,...,ILO-1 */
+/* >                   = D(J),    for J = ILO,...,IHI */
+/* >                   = P(J)     for J = IHI+1,...,N. */
+/* >          The order in which the interchanges are made is N to IHI+1, */
+/* >          then 1 to ILO-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ABNRM */
+/* > \verbatim */
+/* >          ABNRM is REAL */
+/* >          The one-norm of the balanced matrix (the maximum */
+/* >          of the sum of absolute values of elements of any column). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDE */
+/* > \verbatim */
+/* >          RCONDE is REAL array, dimension (N) */
+/* >          RCONDE(j) is the reciprocal condition number of the j-th */
+/* >          eigenvalue. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDV */
+/* > \verbatim */
+/* >          RCONDV is REAL array, dimension (N) */
+/* >          RCONDV(j) is the reciprocal condition number of the j-th */
+/* >          right eigenvector. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.   If SENSE = 'N' or 'E', */
+/* >          LWORK >= f2cmax(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', */
+/* >          LWORK >= 3*N.  If SENSE = 'V' or 'B', LWORK >= N*(N+6). */
+/* >          For good performance, LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (2*N-2) */
+/* >          If SENSE = 'N' or 'E', not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if INFO = i, the QR algorithm failed to compute all the */
+/* >                eigenvalues, and no eigenvectors or condition numbers */
+/* >                have been computed; elements 1:ILO-1 and i+1:N of WR */
+/* >                and WI contain eigenvalues which have converged. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/*  @generated from dgeevx.f, fortran d -> s, Tue Apr 19 01:47:44 2016 */
+
+/* > \ingroup realGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgeevx_(char *balanc, char *jobvl, char *jobvr, char *
+	sense, integer *n, real *a, integer *lda, real *wr, real *wi, real *
+	vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer *
+	ihi, real *scale, real *abnrm, real *rconde, real *rcondv, real *work,
+	 integer *lwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
+	    i__2, i__3;
+    real r__1, r__2;
+
+    /* Local variables */
+    char side[1];
+    real anrm;
+    integer ierr, itau, iwrk, nout;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
+	    integer *, real *, real *);
+    extern real snrm2_(integer *, real *, integer *);
+    integer i__, k;
+    real r__;
+    integer icond;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    extern real slapy2_(real *, real *);
+    real cs;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    logical scalea;
+    real cscale;
+    extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, 
+	    integer *, integer *, real *, integer *);
+    real sn;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), xerbla_(char 
+	    *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    logical select[1];
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *), slartg_(real *, real *, 
+	    real *, real *, real *), sorghr_(integer *, integer *, integer *, 
+	    real *, integer *, real *, real *, integer *, integer *), shseqr_(
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, real *, real *, integer *, real *, integer *, integer *);
+    integer minwrk, maxwrk;
+    extern /* Subroutine */ int strsna_(char *, char *, logical *, integer *, 
+	    real *, integer *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *, 
+	    integer *);
+    logical wantvl, wntsnb;
+    integer hswork;
+    logical wntsne;
+    real smlnum;
+    logical lquery, wantvr, wntsnn, wntsnv;
+    extern /* Subroutine */ int strevc3_(char *, char *, logical *, integer *,
+	     real *, integer *, real *, integer *, real *, integer *, integer 
+	    *, integer *, real *, integer *, integer *);
+    char job[1];
+    real scl, dum[1], eps;
+    integer lwork_trevc__;
+
+
+/*  -- LAPACK driver routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1 * 1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1 * 1;
+    vr -= vr_offset;
+    --scale;
+    --rconde;
+    --rcondv;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    wntsnn = lsame_(sense, "N");
+    wntsne = lsame_(sense, "E");
+    wntsnv = lsame_(sense, "V");
+    wntsnb = lsame_(sense, "B");
+    if (! (lsame_(balanc, "N") || lsame_(balanc, "S") || lsame_(balanc, "P") 
+	    || lsame_(balanc, "B"))) {
+	*info = -1;
+    } else if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -2;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -3;
+    } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb) 
+	    && ! (wantvl && wantvr)) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
+	*info = -11;
+    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
+	*info = -13;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+/*       HSWORK refers to the workspace preferred by SHSEQR, as */
+/*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */
+/*       the worst case.) */
+
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
+	} else {
+	    maxwrk = *n + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, n, &
+		    c__0, (ftnlen)6, (ftnlen)1);
+
+	    if (wantvl) {
+		strevc3_("L", "B", select, n, &a[a_offset], lda, &vl[
+			vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &
+			work[1], &c_n1, &ierr);
+		lwork_trevc__ = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_trevc__;
+		maxwrk = f2cmax(i__1,i__2);
+		shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vl[vl_offset], ldvl, &work[1], &c_n1, info);
+	    } else if (wantvr) {
+		strevc3_("R", "B", select, n, &a[a_offset], lda, &vl[
+			vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &
+			work[1], &c_n1, &ierr);
+		lwork_trevc__ = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_trevc__;
+		maxwrk = f2cmax(i__1,i__2);
+		shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+	    } else {
+		if (wntsnn) {
+		    shseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], 
+			    &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, 
+			    info);
+		} else {
+		    shseqr_("S", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], 
+			    &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, 
+			    info);
+		}
+	    }
+	    hswork = (integer) work[1];
+
+	    if (! wantvl && ! wantvr) {
+		minwrk = *n << 1;
+		if (! wntsnn) {
+/* Computing MAX */
+		    i__1 = minwrk, i__2 = *n * *n + *n * 6;
+		    minwrk = f2cmax(i__1,i__2);
+		}
+		maxwrk = f2cmax(maxwrk,hswork);
+		if (! wntsnn) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *n * *n + *n * 6;
+		    maxwrk = f2cmax(i__1,i__2);
+		}
+	    } else {
+		minwrk = *n * 3;
+		if (! wntsnn && ! wntsne) {
+/* Computing MAX */
+		    i__1 = minwrk, i__2 = *n * *n + *n * 6;
+		    minwrk = f2cmax(i__1,i__2);
+		}
+		maxwrk = f2cmax(maxwrk,hswork);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "SORGHR",
+			 " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		maxwrk = f2cmax(i__1,i__2);
+		if (! wntsnn && ! wntsne) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *n * *n + *n * 6;
+		    maxwrk = f2cmax(i__1,i__2);
+		}
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * 3;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	    maxwrk = f2cmax(maxwrk,minwrk);
+	}
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -21;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEEVX", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    icond = 0;
+    anrm = slange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Balance the matrix and compute ABNRM */
+
+    sgebal_(balanc, n, &a[a_offset], lda, ilo, ihi, &scale[1], &ierr);
+    *abnrm = slange_("1", n, n, &a[a_offset], lda, dum);
+    if (scalea) {
+	dum[0] = *abnrm;
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, &
+		ierr);
+	*abnrm = dum[0];
+    }
+
+/*     Reduce to upper Hessenberg form */
+/*     (Workspace: need 2*N, prefer N+N*NB) */
+
+    itau = 1;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    sgehrd_(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &
+	    ierr);
+
+    if (wantvl) {
+
+/*        Want left eigenvectors */
+/*        Copy Householder vectors to VL */
+
+	*(unsigned char *)side = 'L';
+	slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*        Generate orthogonal matrix in VL */
+/*        (Workspace: need 2*N-1, prefer N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], &
+		i__1, &ierr);
+
+/*        Perform QR iteration, accumulating Schur vectors in VL */
+/*        (Workspace: need 1, prefer HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[
+		vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*           Want left and right eigenvectors */
+/*           Copy Schur vectors to VR */
+
+	    *(unsigned char *)side = 'B';
+	    slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*        Want right eigenvectors */
+/*        Copy Householder vectors to VR */
+
+	*(unsigned char *)side = 'R';
+	slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*        Generate orthogonal matrix in VR */
+/*        (Workspace: need 2*N-1, prefer N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], &
+		i__1, &ierr);
+
+/*        Perform QR iteration, accumulating Schur vectors in VR */
+/*        (Workspace: need 1, prefer HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*        Compute eigenvalues only */
+/*        If condition numbers desired, compute Schur form */
+
+	if (wntsnn) {
+	    *(unsigned char *)job = 'E';
+	} else {
+	    *(unsigned char *)job = 'S';
+	}
+
+/*        (Workspace: need 1, prefer HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_(job, "N", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO .NE. 0 from SHSEQR, then quit */
+
+    if (*info != 0) {
+	goto L50;
+    }
+
+    if (wantvl || wantvr) {
+
+/*        Compute left and/or right eigenvectors */
+/*        (Workspace: need 3*N, prefer N + 2*N*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	strevc3_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], 
+		ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &i__1, &
+		ierr);
+    }
+
+/*     Compute condition numbers if desired */
+/*     (Workspace: need N*N+6*N unless SENSE = 'E') */
+
+    if (! wntsnn) {
+	strsna_(sense, "A", select, n, &a[a_offset], lda, &vl[vl_offset], 
+		ldvl, &vr[vr_offset], ldvr, &rconde[1], &rcondv[1], n, &nout, 
+		&work[iwrk], n, &iwork[1], &icond);
+    }
+
+    if (wantvl) {
+
+/*        Undo balancing of left eigenvectors */
+
+	sgebak_(balanc, "L", n, ilo, ihi, &scale[1], n, &vl[vl_offset], ldvl, 
+		&ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.f) {
+		scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.f) {
+		r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		scl = 1.f / slapy2_(&r__1, &r__2);
+		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    r__1 = vl[k + i__ * vl_dim1];
+/* Computing 2nd power */
+		    r__2 = vl[k + (i__ + 1) * vl_dim1];
+		    work[k] = r__1 * r__1 + r__2 * r__2;
+/* L10: */
+		}
+		k = isamax_(n, &work[1], &c__1);
+		slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], 
+			&cs, &sn, &r__);
+		srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * 
+			vl_dim1 + 1], &c__1, &cs, &sn);
+		vl[k + (i__ + 1) * vl_dim1] = 0.f;
+	    }
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*        Undo balancing of right eigenvectors */
+
+	sgebak_(balanc, "R", n, ilo, ihi, &scale[1], n, &vr[vr_offset], ldvr, 
+		&ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.f) {
+		scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.f) {
+		r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		scl = 1.f / slapy2_(&r__1, &r__2);
+		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    r__1 = vr[k + i__ * vr_dim1];
+/* Computing 2nd power */
+		    r__2 = vr[k + (i__ + 1) * vr_dim1];
+		    work[k] = r__1 * r__1 + r__2 * r__2;
+/* L30: */
+		}
+		k = isamax_(n, &work[1], &c__1);
+		slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], 
+			&cs, &sn, &r__);
+		srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * 
+			vr_dim1 + 1], &c__1, &cs, &sn);
+		vr[k + (i__ + 1) * vr_dim1] = 0.f;
+	    }
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = f2cmax(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 
+		1], &i__2, &ierr);
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = f2cmax(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 
+		1], &i__2, &ierr);
+	if (*info == 0) {
+	    if ((wntsnv || wntsnb) && icond == 0) {
+		slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &rcondv[
+			1], n, &ierr);
+	    }
+	} else {
+	    i__1 = *ilo - 1;
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], 
+		    n, &ierr);
+	    i__1 = *ilo - 1;
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], 
+		    n, &ierr);
+	}
+    }
+
+    work[1] = (real) maxwrk;
+    return 0;
+
+/*     End of SGEEVX */
+
+} /* sgeevx_ */
+
diff --git a/lapack-netlib/SRC/sgehd2.c b/lapack-netlib/SRC/sgehd2.c
new file mode 100644
index 000000000..f5e8e9e51
--- /dev/null
+++ b/lapack-netlib/SRC/sgehd2.c
@@ -0,0 +1,627 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. 
+*/
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEHD2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgehd2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgehd2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgehd2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            IHI, ILO, INFO, LDA, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEHD2 reduces a real general matrix A to upper Hessenberg form H by */
+/* > an orthogonal similarity transformation:  Q**T * A * Q = H . */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* > */
+/* >          It is assumed that A is already upper triangular in rows */
+/* >          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
+/* >          set by a previous call to SGEBAL; otherwise they should be */
+/* >          set to 1 and N respectively. See Further Details. */
+/* >          1 <= ILO <= IHI <= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the n by n general matrix to be reduced. */
+/* >          On exit, the upper triangle and the first subdiagonal of A */
+/* >          are overwritten with the upper Hessenberg matrix H, and the */
+/* >          elements below the first subdiagonal, with the array TAU, */
+/* >          represent the orthogonal matrix Q as a product of elementary */
+/* >          reflectors. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (N-1) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of (ihi-ilo) elementary */
+/* >  reflectors */
+/* > */
+/* >     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
+/* >  exit in A(i+2:ihi,i), and tau in TAU(i). */
+/* > */
+/* >  The contents of A are illustrated by the following example, with */
+/* >  n = 7, ilo = 2 and ihi = 6: */
+/* > */
+/* >  on entry,                        on exit, */
+/* > */
+/* >  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
+/* >  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
+/* >  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
+/* >  (                         a )    (                          a ) */
+/* > */
+/* >  where a denotes an element of the original matrix A, h denotes a */
+/* >  modified element of the upper Hessenberg matrix H, and vi denotes an */
+/* >  element of the vector defining H(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, 
+	integer *lda, real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
+	    integer *, real *);
+    real aii;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
+	*info = -2;
+    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEHD2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    i__1 = *ihi - 1;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+
+/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
+
+	i__2 = *ihi - i__;
+/* Computing MIN */
+	i__3 = i__ + 2;
+	slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*n) + i__ * 
+		a_dim1], &c__1, &tau[i__]);
+	aii = a[i__ + 1 + i__ * a_dim1];
+	a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
+
+	i__2 = *ihi - i__;
+	slarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
+
+/*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
+
+	i__2 = *ihi - i__;
+	i__3 = *n - i__;
+	slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
+
+	a[i__ + 1 + i__ * a_dim1] = aii;
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of SGEHD2 */
+
+} /* sgehd2_ */
+
diff --git a/lapack-netlib/SRC/sgehrd.c b/lapack-netlib/SRC/sgehrd.c
new file mode 100644
index 000000000..1237fe353
--- /dev/null
+++ b/lapack-netlib/SRC/sgehrd.c
@@ -0,0 +1,782 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+static integer c__65 = 65;
+static real c_b25 = -1.f;
+static real c_b26 = 1.f;
+
+/* > \brief \b SGEHRD */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEHRD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgehrd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgehrd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgehrd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            IHI, ILO, INFO, LDA, LWORK, N */
+/*       REAL              A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEHRD reduces a real general matrix A to upper Hessenberg form H by */
+/* > an orthogonal similarity transformation:  Q**T * A * Q = H . */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* > */
+/* >          It is assumed that A is already upper triangular in rows */
+/* >          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
+/* >          set by a previous call to SGEBAL; otherwise they should be */
+/* >          set to 1 and N respectively. See Further Details. */
+/* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the N-by-N general matrix to be reduced. */
+/* >          On exit, the upper triangle and the first subdiagonal of A */
+/* >          are overwritten with the upper Hessenberg matrix H, and the */
+/* >          elements below the first subdiagonal, with the array TAU, */
+/* >          represent the orthogonal matrix Q as a product of elementary */
+/* >          reflectors. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (N-1) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */
+/* >          zero. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (LWORK) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The length of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For good performance, LWORK should generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of (ihi-ilo) elementary */
+/* >  reflectors */
+/* > */
+/* >     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
+/* >  exit in A(i+2:ihi,i), and tau in TAU(i). */
+/* > */
+/* >  The contents of A are illustrated by the following example, with */
+/* >  n = 7, ilo = 2 and ihi = 6: */
+/* > */
+/* >  on entry,                        on exit, */
+/* > */
+/* >  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
+/* >  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
+/* >  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
+/* >  (                         a )    (                          a ) */
+/* > */
+/* >  where a denotes an element of the original matrix A, h denotes a */
+/* >  modified element of the upper Hessenberg matrix H, and vi denotes an */
+/* >  element of the vector defining H(i). */
+/* > */
+/* >  This file is a slight modification of LAPACK-3.0's DGEHRD */
+/* >  subroutine incorporating improvements proposed by Quintana-Orti and */
+/* >  Van de Geijn (2006). (See DLAHR2.) */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, 
+	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, j, nbmin, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *), strmm_(char *, char *, char *,
+	     char *, integer *, integer *, real *, real *, integer *, real *, 
+	    integer *), saxpy_(integer *, 
+	    real *, real *, integer *, real *, integer *), sgehd2_(integer *, 
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    ), slahr2_(integer *, integer *, integer *, real *, integer *, 
+	    real *, real *, integer *, real *, integer *);
+    integer ib;
+    real ei;
+    integer nb, nh, nx;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *,ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iwt;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
+	*info = -2;
+    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -8;
+    }
+
+    if (*info == 0) {
+
+/*       Compute the workspace requirements */
+
+/* Computing MIN */
+	i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nb = f2cmin(i__1,i__2);
+	lwkopt = *n * nb + 4160;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEHRD", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	tau[i__] = 0.f;
+/* L10: */
+    }
+    i__1 = *n - 1;
+    for (i__ = f2cmax(1,*ihi); i__ <= i__1; ++i__) {
+	tau[i__] = 0.f;
+/* L20: */
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+/*     Determine the block size */
+
+/* Computing MIN */
+    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = f2cmin(i__1,i__2);
+    nbmin = 2;
+    if (nb > 1 && nb < nh) {
+
+/*        Determine when to cross over from blocked to unblocked code */
+/*        (last block is always handled by unblocked code) */
+
+/* Computing MAX */
+	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code */
+
+	    if (*lwork < *n * nb + 4160) {
+
+/*              Not enough workspace to use optimal NB:  determine the */
+/*              minimum value of NB, and reduce NB or force use of */
+/*              unblocked code */
+
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+		if (*lwork >= *n * nbmin + 4160) {
+		    nb = (*lwork - 4160) / *n;
+		} else {
+		    nb = 1;
+		}
+	    }
+	}
+    }
+    ldwork = *n;
+
+    if (nb < nbmin || nb >= nh) {
+
+/*        Use unblocked code below */
+
+	i__ = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	iwt = *n * nb + 1;
+	i__1 = *ihi - 1 - nx;
+	i__2 = nb;
+	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - i__;
+	    ib = f2cmin(i__3,i__4);
+
+/*           Reduce columns i:i+ib-1 to Hessenberg form, returning the */
+/*           matrices V and T of the block reflector H = I - V*T*V**T */
+/*           which performs the reduction, and also the matrix Y = A*V*T */
+
+	    slahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], &
+		    work[iwt], &c__65, &work[1], &ldwork);
+
+/*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */
+/*           right, computing  A := A - Y * V**T. V(i+ib,ib-1) must be set */
+/*           to 1 */
+
+	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f;
+	    i__3 = *ihi - i__ - ib + 1;
+	    sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, &
+		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
+		    c_b26, &a[(i__ + ib) * a_dim1 + 1], lda);
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
+
+/*           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */
+/*           right */
+
+	    i__3 = ib - 1;
+	    strmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26,
+		     &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork);
+	    i__3 = ib - 2;
+	    for (j = 0; j <= i__3; ++j) {
+		saxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + 
+			j + 1) * a_dim1 + 1], &c__1);
+/* L30: */
+	    }
+
+/*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */
+/*           left */
+
+	    i__3 = *ihi - i__;
+	    i__4 = *n - i__ - ib + 1;
+	    slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, &work[iwt], &
+		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
+		    ldwork);
+/* L40: */
+	}
+    }
+
+/*     Use unblocked code to reduce the rest of the matrix */
+
+    sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1] = (real) lwkopt;
+
+    return 0;
+
+/*     End of SGEHRD */
+
+} /* sgehrd_ */
+
diff --git a/lapack-netlib/SRC/sgejsv.c b/lapack-netlib/SRC/sgejsv.c
new file mode 100644
index 000000000..416fdb4d5
--- /dev/null
+++ b/lapack-netlib/SRC/sgejsv.c
@@ -0,0 +1,2678 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) ceil(w)
+#define myhuge_(w) HUGE_VAL
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b34 = 0.f;
+static real c_b35 = 1.f;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief \b SGEJSV */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEJSV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgejsv.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgejsv.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgejsv.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, */
+/*                          M, N, A, LDA, SVA, U, LDU, V, LDV, */
+/*                          WORK, LWORK, IWORK, INFO ) */
+
+/*       IMPLICIT    NONE */
+/*       INTEGER     INFO, LDA, LDU, LDV, LWORK, M, N */
+/*       REAL        A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ), */
+/*      $            WORK( LWORK ) */
+/*       INTEGER     IWORK( * ) */
+/*       CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEJSV computes the singular value decomposition (SVD) of a real M-by-N */
+/* > matrix [A], where M >= N. The SVD of [A] is written as */
+/* > */
+/* >              [A] = [U] * [SIGMA] * [V]^t, */
+/* > */
+/* > where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N */
+/* > diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and */
+/* > [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are */
+/* > the singular values of [A]. The columns of [U] and [V] are the left and */
+/* > the right singular vectors of [A], respectively. The matrices [U] and [V] */
+/* > are computed and stored in the arrays U and V, respectively. The diagonal */
+/* > of [SIGMA] is computed and stored in the array SVA. */
+/* > SGEJSV can sometimes compute tiny singular values and their singular vectors much */
+/* > more accurately than other SVD routines, see below under Further Details. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBA */
+/* > \verbatim */
+/* >          JOBA is CHARACTER*1 */
+/* >         Specifies the level of accuracy: */
+/* >       = 'C': This option works well (high relative accuracy) if A = B * D, */
+/* >              with well-conditioned B and arbitrary diagonal matrix D. */
+/* >              The accuracy cannot be spoiled by COLUMN scaling. The */
+/* >              accuracy of the computed output depends on the condition of */
+/* >              B, and the procedure aims at the best theoretical accuracy. */
+/* >              The relative error max_{i=1:N}|d sigma_i| / sigma_i is */
+/* >              bounded by f(M,N)*epsilon* cond(B), independent of D. */
+/* >              The input matrix is preprocessed with the QRF with column */
+/* >              pivoting. This initial preprocessing and preconditioning by */
+/* >              a rank revealing QR factorization is common for all values of */
+/* >              JOBA. Additional actions are specified as follows: */
+/* >       = 'E': Computation as with 'C' with an additional estimate of the */
+/* >              condition number of B. It provides a realistic error bound. */
+/* >       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings */
+/* >              D1, D2, and well-conditioned matrix C, this option gives */
+/* >              higher accuracy than the 'C' option. If the structure of the */
+/* >              input matrix is not known, and relative accuracy is */
+/* >              desirable, then this option is advisable. The input matrix A */
+/* >              is preprocessed with QR factorization with FULL (row and */
+/* >              column) pivoting. */
+/* >       = 'G': Computation as with 'F' with an additional estimate of the */
+/* >              condition number of B, where A=D*B. If A has heavily weighted */
+/* >              rows, then using this condition number gives too pessimistic */
+/* >              error bound. */
+/* >       = 'A': Small singular values are the noise and the matrix is treated */
+/* >              as numerically rank deficient. The error in the computed */
+/* >              singular values is bounded by f(m,n)*epsilon*||A||. */
+/* >              The computed SVD A = U * S * V^t restores A up to */
+/* >              f(m,n)*epsilon*||A||. */
+/* >              This gives the procedure the licence to discard (set to zero) */
+/* >              all singular values below N*epsilon*||A||. */
+/* >       = 'R': Similar as in 'A'. Rank revealing property of the initial */
+/* >              QR factorization is used do reveal (using triangular factor) */
+/* >              a gap sigma_{r+1} < epsilon * sigma_r in which case the */
+/* >              numerical RANK is declared to be r. The SVD is computed with */
+/* >              absolute error bounds, but more accurately than with 'A'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >         Specifies whether to compute the columns of U: */
+/* >       = 'U': N columns of U are returned in the array U. */
+/* >       = 'F': full set of M left sing. vectors is returned in the array U. */
+/* >       = 'W': U may be used as workspace of length M*N. See the description */
+/* >              of U. */
+/* >       = 'N': U is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >         Specifies whether to compute the matrix V: */
+/* >       = 'V': N columns of V are returned in the array V; Jacobi rotations */
+/* >              are not explicitly accumulated. */
+/* >       = 'J': N columns of V are returned in the array V, but they are */
+/* >              computed as the product of Jacobi rotations. This option is */
+/* >              allowed only if JOBU .NE. 'N', i.e. in computing the full SVD. */
+/* >       = 'W': V may be used as workspace of length N*N. See the description */
+/* >              of V. */
+/* >       = 'N': V is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBR */
+/* > \verbatim */
+/* >          JOBR is CHARACTER*1 */
+/* >         Specifies the RANGE for the singular values. Issues the licence to */
+/* >         set to zero small positive singular values if they are outside */
+/* >         specified range. If A .NE. 0 is scaled so that the largest singular */
+/* >         value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues */
+/* >         the licence to kill columns of A whose norm in c*A is less than */
+/* >         SQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN, */
+/* >         where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). */
+/* >       = 'N': Do not kill small columns of c*A. This option assumes that */
+/* >              BLAS and QR factorizations and triangular solvers are */
+/* >              implemented to work in that range. If the condition of A */
+/* >              is greater than BIG, use SGESVJ. */
+/* >       = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)] */
+/* >              (roughly, as described above). This option is recommended. */
+/* >                                             =========================== */
+/* >         For computing the singular values in the FULL range [SFMIN,BIG] */
+/* >         use SGESVJ. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBT */
+/* > \verbatim */
+/* >          JOBT is CHARACTER*1 */
+/* >         If the matrix is square then the procedure may determine to use */
+/* >         transposed A if A^t seems to be better with respect to convergence. */
+/* >         If the matrix is not square, JOBT is ignored. This is subject to */
+/* >         changes in the future. */
+/* >         The decision is based on two values of entropy over the adjoint */
+/* >         orbit of A^t * A. See the descriptions of WORK(6) and WORK(7). */
+/* >       = 'T': transpose if entropy test indicates possibly faster */
+/* >         convergence of Jacobi process if A^t is taken as input. If A is */
+/* >         replaced with A^t, then the row pivoting is included automatically. */
+/* >       = 'N': do not speculate. */
+/* >         This option can be used to compute only the singular values, or the */
+/* >         full SVD (U, SIGMA and V). For only one set of singular vectors */
+/* >         (U or V), the caller should provide both U and V, as one of the */
+/* >         matrices is used as workspace if the matrix A is transposed. */
+/* >         The implementer can easily remove this constraint and make the */
+/* >         code more complicated. See the descriptions of U and V. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBP */
+/* > \verbatim */
+/* >          JOBP is CHARACTER*1 */
+/* >         Issues the licence to introduce structured perturbations to drown */
+/* >         denormalized numbers. This licence should be active if the */
+/* >         denormals are poorly implemented, causing slow computation, */
+/* >         especially in cases of fast convergence (!). For details see [1,2]. */
+/* >         For the sake of simplicity, this perturbations are included only */
+/* >         when the full SVD or only the singular values are requested. The */
+/* >         implementer/user can easily add the perturbation for the cases of */
+/* >         computing one set of singular vectors. */
+/* >       = 'P': introduce perturbation */
+/* >       = 'N': do not perturb */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >         The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >         The number of columns of the input matrix A. M >= N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SVA */
+/* > \verbatim */
+/* >          SVA is REAL array, dimension (N) */
+/* >          On exit, */
+/* >          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the */
+/* >            computation SVA contains Euclidean column norms of the */
+/* >            iterated matrices in the array A. */
+/* >          - For WORK(1) .NE. WORK(2): The singular values of A are */
+/* >            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if */
+/* >            sigma_max(A) overflows or if small singular values have been */
+/* >            saved from underflow by scaling the input matrix A. */
+/* >          - If JOBR='R' then some of the singular values may be returned */
+/* >            as exact zeros obtained by "set to zero" because they are */
+/* >            below the numerical rank threshold or are denormalized numbers. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is REAL array, dimension ( LDU, N ) */
+/* >          If JOBU = 'U', then U contains on exit the M-by-N matrix of */
+/* >                         the left singular vectors. */
+/* >          If JOBU = 'F', then U contains on exit the M-by-M matrix of */
+/* >                         the left singular vectors, including an ONB */
+/* >                         of the orthogonal complement of the Range(A). */
+/* >          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N), */
+/* >                         then U is used as workspace if the procedure */
+/* >                         replaces A with A^t. In that case, [V] is computed */
+/* >                         in U as left singular vectors of A^t and then */
+/* >                         copied back to the V array. This 'W' option is just */
+/* >                         a reminder to the caller that in this case U is */
+/* >                         reserved as workspace of length N*N. */
+/* >          If JOBU = 'N'  U is not referenced, unless JOBT='T'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U,  LDU >= 1. */
+/* >          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension ( LDV, N ) */
+/* >          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of */
+/* >                         the right singular vectors; */
+/* >          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N), */
+/* >                         then V is used as workspace if the pprocedure */
+/* >                         replaces A with A^t. In that case, [U] is computed */
+/* >                         in V as right singular vectors of A^t and then */
+/* >                         copied back to the U array. This 'W' option is just */
+/* >                         a reminder to the caller that in this case V is */
+/* >                         reserved as workspace of length N*N. */
+/* >          If JOBV = 'N'  V is not referenced, unless JOBT='T'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V,  LDV >= 1. */
+/* >          If JOBV = 'V' or 'J' or 'W', then LDV >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (LWORK) */
+/* >          On exit, */
+/* >          WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such */
+/* >                    that SCALE*SVA(1:N) are the computed singular values */
+/* >                    of A. (See the description of SVA().) */
+/* >          WORK(2) = See the description of WORK(1). */
+/* >          WORK(3) = SCONDA is an estimate for the condition number of */
+/* >                    column equilibrated A. (If JOBA = 'E' or 'G') */
+/* >                    SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1). */
+/* >                    It is computed using SPOCON. It holds */
+/* >                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */
+/* >                    where R is the triangular factor from the QRF of A. */
+/* >                    However, if R is truncated and the numerical rank is */
+/* >                    determined to be strictly smaller than N, SCONDA is */
+/* >                    returned as -1, thus indicating that the smallest */
+/* >                    singular values might be lost. */
+/* > */
+/* >          If full SVD is needed, the following two condition numbers are */
+/* >          useful for the analysis of the algorithm. They are provied for */
+/* >          a developer/implementer who is familiar with the details of */
+/* >          the method. */
+/* > */
+/* >          WORK(4) = an estimate of the scaled condition number of the */
+/* >                    triangular factor in the first QR factorization. */
+/* >          WORK(5) = an estimate of the scaled condition number of the */
+/* >                    triangular factor in the second QR factorization. */
+/* >          The following two parameters are computed if JOBT = 'T'. */
+/* >          They are provided for a developer/implementer who is familiar */
+/* >          with the details of the method. */
+/* > */
+/* >          WORK(6) = the entropy of A^t*A :: this is the Shannon entropy */
+/* >                    of diag(A^t*A) / Trace(A^t*A) taken as point in the */
+/* >                    probability simplex. */
+/* >          WORK(7) = the entropy of A*A^t. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          Length of WORK to confirm proper allocation of work space. */
+/* >          LWORK depends on the job: */
+/* > */
+/* >          If only SIGMA is needed ( JOBU = 'N', JOBV = 'N' ) and */
+/* >            -> .. no scaled condition estimate required (JOBE = 'N'): */
+/* >               LWORK >= f2cmax(2*M+N,4*N+1,7). This is the minimal requirement. */
+/* >               ->> For optimal performance (blocked code) the optimal value */
+/* >               is LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal */
+/* >               block size for DGEQP3 and DGEQRF. */
+/* >               In general, optimal LWORK is computed as */
+/* >               LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7). */
+/* >            -> .. an estimate of the scaled condition number of A is */
+/* >               required (JOBA='E', 'G'). In this case, LWORK is the maximum */
+/* >               of the above and N*N+4*N, i.e. LWORK >= f2cmax(2*M+N,N*N+4*N,7). */
+/* >               ->> For optimal performance (blocked code) the optimal value */
+/* >               is LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7). */
+/* >               In general, the optimal length LWORK is computed as */
+/* >               LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), */
+/* >                                                     N+N*N+LWORK(DPOCON),7). */
+/* > */
+/* >          If SIGMA and the right singular vectors are needed (JOBV = 'V'), */
+/* >            -> the minimal requirement is LWORK >= f2cmax(2*M+N,4*N+1,7). */
+/* >            -> For optimal performance, LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7), */
+/* >               where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ, */
+/* >               DORMLQ. In general, the optimal length LWORK is computed as */
+/* >               LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON), */
+/* >                       N+LWORK(DGELQ), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)). */
+/* > */
+/* >          If SIGMA and the left singular vectors are needed */
+/* >            -> the minimal requirement is LWORK >= f2cmax(2*M+N,4*N+1,7). */
+/* >            -> For optimal performance: */
+/* >               if JOBU = 'U' :: LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7), */
+/* >               if JOBU = 'F' :: LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,N+M*NB,7), */
+/* >               where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR. */
+/* >               In general, the optimal length LWORK is computed as */
+/* >               LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON), */
+/* >                        2*N+LWORK(DGEQRF), N+LWORK(DORMQR)). */
+/* >               Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or */
+/* >               M*NB (for JOBU = 'F'). */
+/* > */
+/* >          If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and */
+/* >            -> if JOBV = 'V' */
+/* >               the minimal requirement is LWORK >= f2cmax(2*M+N,6*N+2*N*N). */
+/* >            -> if JOBV = 'J' the minimal requirement is */
+/* >               LWORK >= f2cmax(2*M+N, 4*N+N*N,2*N+N*N+6). */
+/* >            -> For optimal performance, LWORK should be additionally */
+/* >               larger than N+M*NB, where NB is the optimal block size */
+/* >               for DORMQR. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (M+3*N). */
+/* >          On exit, */
+/* >          IWORK(1) = the numerical rank determined after the initial */
+/* >                     QR factorization with pivoting. See the descriptions */
+/* >                     of JOBA and JOBR. */
+/* >          IWORK(2) = the number of the computed nonzero singular values */
+/* >          IWORK(3) = if nonzero, a warning message: */
+/* >                     If IWORK(3) = 1 then some of the column norms of A */
+/* >                     were denormalized floats. The requested high accuracy */
+/* >                     is not warranted by the data. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >           < 0:  if INFO = -i, then the i-th argument had an illegal value. */
+/* >           = 0:  successful exit; */
+/* >           > 0:  SGEJSV  did not converge in the maximal allowed number */
+/* >                 of sweeps. The computed values may be inaccurate. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup realGEsing */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  SGEJSV implements a preconditioned Jacobi SVD algorithm. It uses SGEQP3, */
+/* >  SGEQRF, and SGELQF as preprocessors and preconditioners. Optionally, an */
+/* >  additional row pivoting can be used as a preprocessor, which in some */
+/* >  cases results in much higher accuracy. An example is matrix A with the */
+/* >  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned */
+/* >  diagonal matrices and C is well-conditioned matrix. In that case, complete */
+/* >  pivoting in the first QR factorizations provides accuracy dependent on the */
+/* >  condition number of C, and independent of D1, D2. Such higher accuracy is */
+/* >  not completely understood theoretically, but it works well in practice. */
+/* >  Further, if A can be written as A = B*D, with well-conditioned B and some */
+/* >  diagonal D, then the high accuracy is guaranteed, both theoretically and */
+/* >  in software, independent of D. For more details see [1], [2]. */
+/* >     The computational range for the singular values can be the full range */
+/* >  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS */
+/* >  & LAPACK routines called by SGEJSV are implemented to work in that range. */
+/* >  If that is not the case, then the restriction for safe computation with */
+/* >  the singular values in the range of normalized IEEE numbers is that the */
+/* >  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not */
+/* >  overflow. This code (SGEJSV) is best used in this restricted range, */
+/* >  meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are */
+/* >  returned as zeros. See JOBR for details on this. */
+/* >     Further, this implementation is somewhat slower than the one described */
+/* >  in [1,2] due to replacement of some non-LAPACK components, and because */
+/* >  the choice of some tuning parameters in the iterative part (SGESVJ) is */
+/* >  left to the implementer on a particular machine. */
+/* >     The rank revealing QR factorization (in this code: SGEQP3) should be */
+/* >  implemented as in [3]. We have a new version of SGEQP3 under development */
+/* >  that is more robust than the current one in LAPACK, with a cleaner cut in */
+/* >  rank deficient cases. It will be available in the SIGMA library [4]. */
+/* >  If M is much larger than N, it is obvious that the initial QRF with */
+/* >  column pivoting can be preprocessed by the QRF without pivoting. That */
+/* >  well known trick is not used in SGEJSV because in some cases heavy row */
+/* >  weighting can be treated with complete pivoting. The overhead in cases */
+/* >  M much larger than N is then only due to pivoting, but the benefits in */
+/* >  terms of accuracy have prevailed. The implementer/user can incorporate */
+/* >  this extra QRF step easily. The implementer can also improve data movement */
+/* >  (matrix transpose, matrix copy, matrix transposed copy) - this */
+/* >  implementation of SGEJSV uses only the simplest, naive data movement. */
+/* > \endverbatim */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */
+
+/* > \par References: */
+/*  ================ */
+/* > */
+/* > \verbatim */
+/* > */
+/* > [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */
+/* >     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */
+/* >     LAPACK Working note 169. */
+/* > [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */
+/* >     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */
+/* >     LAPACK Working note 170. */
+/* > [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR */
+/* >     factorization software - a case study. */
+/* >     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. */
+/* >     LAPACK Working note 176. */
+/* > [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */
+/* >     QSVD, (H,K)-SVD computations. */
+/* >     Department of Mathematics, University of Zagreb, 2008. */
+/* > \endverbatim */
+
+/* >  \par Bugs, examples and comments: */
+/*   ================================= */
+/* > */
+/* >  Please report all bugs and send interesting examples and/or comments to */
+/* >  drmac@math.hr. Thank you. */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgejsv_(char *joba, char *jobu, char *jobv, char *jobr, 
+	char *jobt, char *jobp, integer *m, integer *n, real *a, integer *lda,
+	 real *sva, real *u, integer *ldu, real *v, integer *ldv, real *work, 
+	integer *lwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, 
+	    i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11, i__12;
+    real r__1, r__2, r__3, r__4;
+
+    /* Local variables */
+    logical defr;
+    real aapp, aaqq;
+    logical kill;
+    integer ierr;
+    real temp1;
+    extern real snrm2_(integer *, real *, integer *);
+    integer p, q;
+    logical jracc;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    real small, entra, sfmin;
+    logical lsvec;
+    real epsln;
+    logical rsvec;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    integer n1;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *);
+    logical l2aber;
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    );
+    real condr1, condr2, uscal1, uscal2;
+    logical l2kill, l2rank, l2tran;
+    extern /* Subroutine */ int sgeqp3_(integer *, integer *, real *, integer 
+	    *, integer *, real *, real *, integer *, integer *);
+    logical l2pert;
+    integer nr;
+    real scalem, sconda;
+    logical goscal;
+    real aatmin;
+    extern real slamch_(char *);
+    real aatmax;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical noscal;
+    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *), sgeqrf_(integer *, integer *, real *, integer *, real *, 
+	    real *, integer *, integer *), slacpy_(char *, integer *, integer 
+	    *, real *, integer *, real *, integer *), slaset_(char *, 
+	    integer *, integer *, real *, real *, real *, integer *);
+    real entrat;
+    logical almort;
+    real maxprj;
+    extern /* Subroutine */ int spocon_(char *, integer *, real *, integer *, 
+	    real *, real *, real *, integer *, integer *);
+    logical errest;
+    extern /* Subroutine */ int sgesvj_(char *, char *, char *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , real *, integer *, integer *), slassq_(
+	    integer *, real *, integer *, real *, real *);
+    logical transp;
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
+	    *, integer *, integer *, integer *), sorgqr_(integer *, integer *,
+	     integer *, real *, integer *, real *, real *, integer *, integer 
+	    *), sormlq_(char *, char *, integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, real *, integer *, 
+	    integer *), sormqr_(char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , real *, integer *, integer *);
+    logical rowpiv;
+    real big, cond_ok__, xsc, big1;
+    integer warning, numrank;
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  =========================================================================== */
+
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    --sva;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    lsvec = lsame_(jobu, "U") || lsame_(jobu, "F");
+    jracc = lsame_(jobv, "J");
+    rsvec = lsame_(jobv, "V") || jracc;
+    rowpiv = lsame_(joba, "F") || lsame_(joba, "G");
+    l2rank = lsame_(joba, "R");
+    l2aber = lsame_(joba, "A");
+    errest = lsame_(joba, "E") || lsame_(joba, "G");
+    l2tran = lsame_(jobt, "T");
+    l2kill = lsame_(jobr, "R");
+    defr = lsame_(jobr, "N");
+    l2pert = lsame_(jobp, "P");
+
+    if (! (rowpiv || l2rank || l2aber || errest || lsame_(joba, "C"))) {
+	*info = -1;
+    } else if (! (lsvec || lsame_(jobu, "N") || lsame_(
+	    jobu, "W"))) {
+	*info = -2;
+    } else if (! (rsvec || lsame_(jobv, "N") || lsame_(
+	    jobv, "W")) || jracc && ! lsvec) {
+	*info = -3;
+    } else if (! (l2kill || defr)) {
+	*info = -4;
+    } else if (! (l2tran || lsame_(jobt, "N"))) {
+	*info = -5;
+    } else if (! (l2pert || lsame_(jobp, "N"))) {
+	*info = -6;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*n < 0 || *n > *m) {
+	*info = -8;
+    } else if (*lda < *m) {
+	*info = -10;
+    } else if (lsvec && *ldu < *m) {
+	*info = -13;
+    } else if (rsvec && *ldv < *n) {
+	*info = -15;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 7, i__2 = (*n << 2) + 1, i__1 = f2cmax(i__1,i__2), i__2 = (*m << 
+		1) + *n;
+/* Computing MAX */
+	i__3 = 7, i__4 = (*n << 2) + *n * *n, i__3 = f2cmax(i__3,i__4), i__4 = (*
+		m << 1) + *n;
+/* Computing MAX */
+	i__5 = 7, i__6 = (*m << 1) + *n, i__5 = f2cmax(i__5,i__6), i__6 = (*n << 
+		2) + 1;
+/* Computing MAX */
+	i__7 = 7, i__8 = (*m << 1) + *n, i__7 = f2cmax(i__7,i__8), i__8 = (*n << 
+		2) + 1;
+/* Computing MAX */
+	i__9 = (*m << 1) + *n, i__10 = *n * 6 + (*n << 1) * *n;
+/* Computing MAX */
+	i__11 = (*m << 1) + *n, i__12 = (*n << 2) + *n * *n, i__11 = f2cmax(
+		i__11,i__12), i__12 = (*n << 1) + *n * *n + 6;
+	if (! (lsvec || rsvec || errest) && *lwork < f2cmax(i__1,i__2) || ! (
+		lsvec || rsvec) && errest && *lwork < f2cmax(i__3,i__4) || lsvec 
+		&& ! rsvec && *lwork < f2cmax(i__5,i__6) || rsvec && ! lsvec && *
+		lwork < f2cmax(i__7,i__8) || lsvec && rsvec && ! jracc && *lwork 
+		< f2cmax(i__9,i__10) || lsvec && rsvec && jracc && *lwork < f2cmax(
+		i__11,i__12)) {
+	    *info = -17;
+	} else {
+/*        #:) */
+	    *info = 0;
+	}
+    }
+
+    if (*info != 0) {
+/*       #:( */
+	i__1 = -(*info);
+	xerbla_("SGEJSV", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return for void matrix (Y3K safe) */
+/* #:) */
+    if (*m == 0 || *n == 0) {
+	iwork[1] = 0;
+	iwork[2] = 0;
+	iwork[3] = 0;
+	work[1] = 0.f;
+	work[2] = 0.f;
+	work[3] = 0.f;
+	work[4] = 0.f;
+	work[5] = 0.f;
+	work[6] = 0.f;
+	work[7] = 0.f;
+	return 0;
+    }
+
+/*     Determine whether the matrix U should be M x N or M x M */
+
+    if (lsvec) {
+	n1 = *n;
+	if (lsame_(jobu, "F")) {
+	    n1 = *m;
+	}
+    }
+
+/*     Set numerical parameters */
+
+/* !    NOTE: Make sure SLAMCH() does not fail on the target architecture. */
+
+    epsln = slamch_("Epsilon");
+    sfmin = slamch_("SafeMinimum");
+    small = sfmin / epsln;
+    big = slamch_("O");
+/*     BIG   = ONE / SFMIN */
+
+/*     Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N */
+
+/* (!)  If necessary, scale SVA() to protect the largest norm from */
+/*     overflow. It is possible that this scaling pushes the smallest */
+/*     column norm left from the underflow threshold (extreme case). */
+
+    scalem = 1.f / sqrt((real) (*m) * (real) (*n));
+    noscal = TRUE_;
+    goscal = TRUE_;
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+	aapp = 0.f;
+	aaqq = 1.f;
+	slassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
+	if (aapp > big) {
+	    *info = -9;
+	    i__2 = -(*info);
+	    xerbla_("SGEJSV", &i__2, (ftnlen)6);
+	    return 0;
+	}
+	aaqq = sqrt(aaqq);
+	if (aapp < big / aaqq && noscal) {
+	    sva[p] = aapp * aaqq;
+	} else {
+	    noscal = FALSE_;
+	    sva[p] = aapp * (aaqq * scalem);
+	    if (goscal) {
+		goscal = FALSE_;
+		i__2 = p - 1;
+		sscal_(&i__2, &scalem, &sva[1], &c__1);
+	    }
+	}
+/* L1874: */
+    }
+
+    if (noscal) {
+	scalem = 1.f;
+    }
+
+    aapp = 0.f;
+    aaqq = big;
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+/* Computing MAX */
+	r__1 = aapp, r__2 = sva[p];
+	aapp = f2cmax(r__1,r__2);
+	if (sva[p] != 0.f) {
+/* Computing MIN */
+	    r__1 = aaqq, r__2 = sva[p];
+	    aaqq = f2cmin(r__1,r__2);
+	}
+/* L4781: */
+    }
+
+/*     Quick return for zero M x N matrix */
+/* #:) */
+    if (aapp == 0.f) {
+	if (lsvec) {
+	    slaset_("G", m, &n1, &c_b34, &c_b35, &u[u_offset], ldu)
+		    ;
+	}
+	if (rsvec) {
+	    slaset_("G", n, n, &c_b34, &c_b35, &v[v_offset], ldv);
+	}
+	work[1] = 1.f;
+	work[2] = 1.f;
+	if (errest) {
+	    work[3] = 1.f;
+	}
+	if (lsvec && rsvec) {
+	    work[4] = 1.f;
+	    work[5] = 1.f;
+	}
+	if (l2tran) {
+	    work[6] = 0.f;
+	    work[7] = 0.f;
+	}
+	iwork[1] = 0;
+	iwork[2] = 0;
+	iwork[3] = 0;
+	return 0;
+    }
+
+/*     Issue warning if denormalized column norms detected. Override the */
+/*     high relative accuracy request. Issue licence to kill columns */
+/*     (set them to zero) whose norm is less than sigma_max / BIG (roughly). */
+/* #:( */
+    warning = 0;
+    if (aaqq <= sfmin) {
+	l2rank = TRUE_;
+	l2kill = TRUE_;
+	warning = 1;
+    }
+
+/*     Quick return for one-column matrix */
+/* #:) */
+    if (*n == 1) {
+
+	if (lsvec) {
+	    slascl_("G", &c__0, &c__0, &sva[1], &scalem, m, &c__1, &a[a_dim1 
+		    + 1], lda, &ierr);
+	    slacpy_("A", m, &c__1, &a[a_offset], lda, &u[u_offset], ldu);
+/*           computing all M left singular vectors of the M x 1 matrix */
+	    if (n1 != *n) {
+		i__1 = *lwork - *n;
+		sgeqrf_(m, n, &u[u_offset], ldu, &work[1], &work[*n + 1], &
+			i__1, &ierr);
+		i__1 = *lwork - *n;
+		sorgqr_(m, &n1, &c__1, &u[u_offset], ldu, &work[1], &work[*n 
+			+ 1], &i__1, &ierr);
+		scopy_(m, &a[a_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
+	    }
+	}
+	if (rsvec) {
+	    v[v_dim1 + 1] = 1.f;
+	}
+	if (sva[1] < big * scalem) {
+	    sva[1] /= scalem;
+	    scalem = 1.f;
+	}
+	work[1] = 1.f / scalem;
+	work[2] = 1.f;
+	if (sva[1] != 0.f) {
+	    iwork[1] = 1;
+	    if (sva[1] / scalem >= sfmin) {
+		iwork[2] = 1;
+	    } else {
+		iwork[2] = 0;
+	    }
+	} else {
+	    iwork[1] = 0;
+	    iwork[2] = 0;
+	}
+	iwork[3] = 0;
+	if (errest) {
+	    work[3] = 1.f;
+	}
+	if (lsvec && rsvec) {
+	    work[4] = 1.f;
+	    work[5] = 1.f;
+	}
+	if (l2tran) {
+	    work[6] = 0.f;
+	    work[7] = 0.f;
+	}
+	return 0;
+
+    }
+
+    transp = FALSE_;
+    l2tran = l2tran && *m == *n;
+
+    aatmax = -1.f;
+    aatmin = big;
+    if (rowpiv || l2tran) {
+
+/*     Compute the row norms, needed to determine row pivoting sequence */
+/*     (in the case of heavily row weighted A, row pivoting is strongly */
+/*     advised) and to collect information needed to compare the */
+/*     structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.). */
+
+	if (l2tran) {
+	    i__1 = *m;
+	    for (p = 1; p <= i__1; ++p) {
+		xsc = 0.f;
+		temp1 = 1.f;
+		slassq_(n, &a[p + a_dim1], lda, &xsc, &temp1);
+/*              SLASSQ gets both the ell_2 and the ell_infinity norm */
+/*              in one pass through the vector */
+		work[*m + *n + p] = xsc * scalem;
+		work[*n + p] = xsc * (scalem * sqrt(temp1));
+/* Computing MAX */
+		r__1 = aatmax, r__2 = work[*n + p];
+		aatmax = f2cmax(r__1,r__2);
+		if (work[*n + p] != 0.f) {
+/* Computing MIN */
+		    r__1 = aatmin, r__2 = work[*n + p];
+		    aatmin = f2cmin(r__1,r__2);
+		}
+/* L1950: */
+	    }
+	} else {
+	    i__1 = *m;
+	    for (p = 1; p <= i__1; ++p) {
+		work[*m + *n + p] = scalem * (r__1 = a[p + isamax_(n, &a[p + 
+			a_dim1], lda) * a_dim1], abs(r__1));
+/* Computing MAX */
+		r__1 = aatmax, r__2 = work[*m + *n + p];
+		aatmax = f2cmax(r__1,r__2);
+/* Computing MIN */
+		r__1 = aatmin, r__2 = work[*m + *n + p];
+		aatmin = f2cmin(r__1,r__2);
+/* L1904: */
+	    }
+	}
+
+    }
+
+/*     For square matrix A try to determine whether A^t  would be  better */
+/*     input for the preconditioned Jacobi SVD, with faster convergence. */
+/*     The decision is based on an O(N) function of the vector of column */
+/*     and row norms of A, based on the Shannon entropy. This should give */
+/*     the right choice in most cases when the difference actually matters. */
+/*     It may fail and pick the slower converging side. */
+
+    entra = 0.f;
+    entrat = 0.f;
+    if (l2tran) {
+
+	xsc = 0.f;
+	temp1 = 1.f;
+	slassq_(n, &sva[1], &c__1, &xsc, &temp1);
+	temp1 = 1.f / temp1;
+
+	entra = 0.f;
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+/* Computing 2nd power */
+	    r__1 = sva[p] / xsc;
+	    big1 = r__1 * r__1 * temp1;
+	    if (big1 != 0.f) {
+		entra += big1 * log(big1);
+	    }
+/* L1113: */
+	}
+	entra = -entra / log((real) (*n));
+
+/*        Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex. */
+/*        It is derived from the diagonal of  A^t * A.  Do the same with the */
+/*        diagonal of A * A^t, compute the entropy of the corresponding */
+/*        probability distribution. Note that A * A^t and A^t * A have the */
+/*        same trace. */
+
+	entrat = 0.f;
+	i__1 = *n + *m;
+	for (p = *n + 1; p <= i__1; ++p) {
+/* Computing 2nd power */
+	    r__1 = work[p] / xsc;
+	    big1 = r__1 * r__1 * temp1;
+	    if (big1 != 0.f) {
+		entrat += big1 * log(big1);
+	    }
+/* L1114: */
+	}
+	entrat = -entrat / log((real) (*m));
+
+/*        Analyze the entropies and decide A or A^t. Smaller entropy */
+/*        usually means better input for the algorithm. */
+
+	transp = entrat < entra;
+
+/*        If A^t is better than A, transpose A. */
+
+	if (transp) {
+/*           In an optimal implementation, this trivial transpose */
+/*           should be replaced with faster transpose. */
+	    i__1 = *n - 1;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n;
+		for (q = p + 1; q <= i__2; ++q) {
+		    temp1 = a[q + p * a_dim1];
+		    a[q + p * a_dim1] = a[p + q * a_dim1];
+		    a[p + q * a_dim1] = temp1;
+/* L1116: */
+		}
+/* L1115: */
+	    }
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		work[*m + *n + p] = sva[p];
+		sva[p] = work[*n + p];
+/* L1117: */
+	    }
+	    temp1 = aapp;
+	    aapp = aatmax;
+	    aatmax = temp1;
+	    temp1 = aaqq;
+	    aaqq = aatmin;
+	    aatmin = temp1;
+	    kill = lsvec;
+	    lsvec = rsvec;
+	    rsvec = kill;
+	    if (lsvec) {
+		n1 = *n;
+	    }
+
+	    rowpiv = TRUE_;
+	}
+
+    }
+/*     END IF L2TRAN */
+
+/*     Scale the matrix so that its maximal singular value remains less */
+/*     than SQRT(BIG) -- the matrix is scaled so that its maximal column */
+/*     has Euclidean norm equal to SQRT(BIG/N). The only reason to keep */
+/*     SQRT(BIG) instead of BIG is the fact that SGEJSV uses LAPACK and */
+/*     BLAS routines that, in some implementations, are not capable of */
+/*     working in the full interval [SFMIN,BIG] and that they may provoke */
+/*     overflows in the intermediate results. If the singular values spread */
+/*     from SFMIN to BIG, then SGESVJ will compute them. So, in that case, */
+/*     one should use SGESVJ instead of SGEJSV. */
+
+    big1 = sqrt(big);
+    temp1 = sqrt(big / (real) (*n));
+
+    slascl_("G", &c__0, &c__0, &aapp, &temp1, n, &c__1, &sva[1], n, &ierr);
+    if (aaqq > aapp * sfmin) {
+	aaqq = aaqq / aapp * temp1;
+    } else {
+	aaqq = aaqq * temp1 / aapp;
+    }
+    temp1 *= scalem;
+    slascl_("G", &c__0, &c__0, &aapp, &temp1, m, n, &a[a_offset], lda, &ierr);
+
+/*     To undo scaling at the end of this procedure, multiply the */
+/*     computed singular values with USCAL2 / USCAL1. */
+
+    uscal1 = temp1;
+    uscal2 = aapp;
+
+    if (l2kill) {
+/*        L2KILL enforces computation of nonzero singular values in */
+/*        the restricted range of condition number of the initial A, */
+/*        sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN). */
+	xsc = sqrt(sfmin);
+    } else {
+	xsc = small;
+
+/*        Now, if the condition number of A is too big, */
+/*        sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN, */
+/*        as a precaution measure, the full SVD is computed using SGESVJ */
+/*        with accumulated Jacobi rotations. This provides numerically */
+/*        more robust computation, at the cost of slightly increased run */
+/*        time. Depending on the concrete implementation of BLAS and LAPACK */
+/*        (i.e. how they behave in presence of extreme ill-conditioning) the */
+/*        implementor may decide to remove this switch. */
+	if (aaqq < sqrt(sfmin) && lsvec && rsvec) {
+	    jracc = TRUE_;
+	}
+
+    }
+    if (aaqq < xsc) {
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    if (sva[p] < xsc) {
+		slaset_("A", m, &c__1, &c_b34, &c_b34, &a[p * a_dim1 + 1], 
+			lda);
+		sva[p] = 0.f;
+	    }
+/* L700: */
+	}
+    }
+
+/*     Preconditioning using QR factorization with pivoting */
+
+    if (rowpiv) {
+/*        Optional row permutation (Bjoerck row pivoting): */
+/*        A result by Cox and Higham shows that the Bjoerck's */
+/*        row pivoting combined with standard column pivoting */
+/*        has similar effect as Powell-Reid complete pivoting. */
+/*        The ell-infinity norms of A are made nonincreasing. */
+	i__1 = *m - 1;
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = *m - p + 1;
+	    q = isamax_(&i__2, &work[*m + *n + p], &c__1) + p - 1;
+	    iwork[(*n << 1) + p] = q;
+	    if (p != q) {
+		temp1 = work[*m + *n + p];
+		work[*m + *n + p] = work[*m + *n + q];
+		work[*m + *n + q] = temp1;
+	    }
+/* L1952: */
+	}
+	i__1 = *m - 1;
+	slaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[(*n << 1) + 1], &
+		c__1);
+    }
+
+/*     End of the preparation phase (scaling, optional sorting and */
+/*     transposing, optional flushing of small columns). */
+
+/*     Preconditioning */
+
+/*     If the full SVD is needed, the right singular vectors are computed */
+/*     from a matrix equation, and for that we need theoretical analysis */
+/*     of the Businger-Golub pivoting. So we use SGEQP3 as the first RR QRF. */
+/*     In all other cases the first RR QRF can be chosen by other criteria */
+/*     (eg speed by replacing global with restricted window pivoting, such */
+/*     as in SGEQPX from TOMS # 782). Good results will be obtained using */
+/*     SGEQPX with properly (!) chosen numerical parameters. */
+/*     Any improvement of SGEQP3 improves overal performance of SGEJSV. */
+
+/*     A * P1 = Q1 * [ R1^t 0]^t: */
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+	iwork[p] = 0;
+/* L1963: */
+    }
+    i__1 = *lwork - *n;
+    sgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], &
+	    i__1, &ierr);
+
+/*     The upper triangular matrix R1 from the first QRF is inspected for */
+/*     rank deficiency and possibilities for deflation, or possible */
+/*     ill-conditioning. Depending on the user specified flag L2RANK, */
+/*     the procedure explores possibilities to reduce the numerical */
+/*     rank by inspecting the computed upper triangular factor. If */
+/*     L2RANK or L2ABER are up, then SGEJSV will compute the SVD of */
+/*     A + dA, where ||dA|| <= f(M,N)*EPSLN. */
+
+    nr = 1;
+    if (l2aber) {
+/*        Standard absolute error bound suffices. All sigma_i with */
+/*        sigma_i < N*EPSLN*||A|| are flushed to zero. This is an */
+/*        aggressive enforcement of lower numerical rank by introducing a */
+/*        backward error of the order of N*EPSLN*||A||. */
+	temp1 = sqrt((real) (*n)) * epsln;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((r__2 = a[p + p * a_dim1], abs(r__2)) >= temp1 * (r__1 = a[
+		    a_dim1 + 1], abs(r__1))) {
+		++nr;
+	    } else {
+		goto L3002;
+	    }
+/* L3001: */
+	}
+L3002:
+	;
+    } else if (l2rank) {
+/*        Sudden drop on the diagonal of R1 is used as the criterion for */
+/*        close-to-rank-deficient. */
+	temp1 = sqrt(sfmin);
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((r__2 = a[p + p * a_dim1], abs(r__2)) < epsln * (r__1 = a[p - 
+		    1 + (p - 1) * a_dim1], abs(r__1)) || (r__3 = a[p + p * 
+		    a_dim1], abs(r__3)) < small || l2kill && (r__4 = a[p + p *
+		     a_dim1], abs(r__4)) < temp1) {
+		goto L3402;
+	    }
+	    ++nr;
+/* L3401: */
+	}
+L3402:
+
+	;
+    } else {
+/*        The goal is high relative accuracy. However, if the matrix */
+/*        has high scaled condition number the relative accuracy is in */
+/*        general not feasible. Later on, a condition number estimator */
+/*        will be deployed to estimate the scaled condition number. */
+/*        Here we just remove the underflowed part of the triangular */
+/*        factor. This prevents the situation in which the code is */
+/*        working hard to get the accuracy not warranted by the data. */
+	temp1 = sqrt(sfmin);
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((r__1 = a[p + p * a_dim1], abs(r__1)) < small || l2kill && (
+		    r__2 = a[p + p * a_dim1], abs(r__2)) < temp1) {
+		goto L3302;
+	    }
+	    ++nr;
+/* L3301: */
+	}
+L3302:
+
+	;
+    }
+
+    almort = FALSE_;
+    if (nr == *n) {
+	maxprj = 1.f;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    temp1 = (r__1 = a[p + p * a_dim1], abs(r__1)) / sva[iwork[p]];
+	    maxprj = f2cmin(maxprj,temp1);
+/* L3051: */
+	}
+/* Computing 2nd power */
+	r__1 = maxprj;
+	if (r__1 * r__1 >= 1.f - (real) (*n) * epsln) {
+	    almort = TRUE_;
+	}
+    }
+
+
+    sconda = -1.f;
+    condr1 = -1.f;
+    condr2 = -1.f;
+
+    if (errest) {
+	if (*n == nr) {
+	    if (rsvec) {
+		slacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    temp1 = sva[iwork[p]];
+		    r__1 = 1.f / temp1;
+		    sscal_(&p, &r__1, &v[p * v_dim1 + 1], &c__1);
+/* L3053: */
+		}
+		spocon_("U", n, &v[v_offset], ldv, &c_b35, &temp1, &work[*n + 
+			1], &iwork[(*n << 1) + *m + 1], &ierr);
+	    } else if (lsvec) {
+		slacpy_("U", n, n, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    temp1 = sva[iwork[p]];
+		    r__1 = 1.f / temp1;
+		    sscal_(&p, &r__1, &u[p * u_dim1 + 1], &c__1);
+/* L3054: */
+		}
+		spocon_("U", n, &u[u_offset], ldu, &c_b35, &temp1, &work[*n + 
+			1], &iwork[(*n << 1) + *m + 1], &ierr);
+	    } else {
+		slacpy_("U", n, n, &a[a_offset], lda, &work[*n + 1], n);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    temp1 = sva[iwork[p]];
+		    r__1 = 1.f / temp1;
+		    sscal_(&p, &r__1, &work[*n + (p - 1) * *n + 1], &c__1);
+/* L3052: */
+		}
+		spocon_("U", n, &work[*n + 1], n, &c_b35, &temp1, &work[*n + *
+			n * *n + 1], &iwork[(*n << 1) + *m + 1], &ierr);
+	    }
+	    sconda = 1.f / sqrt(temp1);
+/*           SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1). */
+/*           N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */
+	} else {
+	    sconda = -1.f;
+	}
+    }
+
+    l2pert = l2pert && (r__1 = a[a_dim1 + 1] / a[nr + nr * a_dim1], abs(r__1))
+	     > sqrt(big1);
+/*     If there is no violent scaling, artificial perturbation is not needed. */
+
+/*     Phase 3: */
+
+    if (! (rsvec || lsvec)) {
+
+/*         Singular Values only */
+
+/* Computing MIN */
+	i__2 = *n - 1;
+	i__1 = f2cmin(i__2,nr);
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = *n - p;
+	    scopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * 
+		    a_dim1], &c__1);
+/* L1946: */
+	}
+
+/*        The following two DO-loops introduce small relative perturbation */
+/*        into the strict upper triangle of the lower triangular matrix. */
+/*        Small entries below the main diagonal are also changed. */
+/*        This modification is useful if the computing environment does not */
+/*        provide/allow FLUSH TO ZERO underflow, for it prevents many */
+/*        annoying denormalized numbers in case of strongly scaled matrices. */
+/*        The perturbation is structured so that it does not introduce any */
+/*        new perturbation of the singular values, and it does not destroy */
+/*        the job done by the preconditioner. */
+/*        The licence for this perturbation is in the variable L2PERT, which */
+/*        should be .FALSE. if FLUSH TO ZERO underflow is active. */
+
+	if (! almort) {
+
+	    if (l2pert) {
+/*              XSC = SQRT(SMALL) */
+		xsc = epsln / (real) (*n);
+		i__1 = nr;
+		for (q = 1; q <= i__1; ++q) {
+		    temp1 = xsc * (r__1 = a[q + q * a_dim1], abs(r__1));
+		    i__2 = *n;
+		    for (p = 1; p <= i__2; ++p) {
+			if (p > q && (r__1 = a[p + q * a_dim1], abs(r__1)) <= 
+				temp1 || p < q) {
+			    a[p + q * a_dim1] = r_sign(&temp1, &a[p + q * 
+				    a_dim1]);
+			}
+/* L4949: */
+		    }
+/* L4947: */
+		}
+	    } else {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + 
+			1], lda);
+	    }
+
+
+	    i__1 = *lwork - *n;
+	    sgeqrf_(n, &nr, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1,
+		     &ierr);
+
+	    i__1 = nr - 1;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = nr - p;
+		scopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * 
+			a_dim1], &c__1);
+/* L1948: */
+	    }
+
+	}
+
+/*           Row-cyclic Jacobi SVD algorithm with column pivoting */
+
+/*           to drown denormals */
+	if (l2pert) {
+/*              XSC = SQRT(SMALL) */
+	    xsc = epsln / (real) (*n);
+	    i__1 = nr;
+	    for (q = 1; q <= i__1; ++q) {
+		temp1 = xsc * (r__1 = a[q + q * a_dim1], abs(r__1));
+		i__2 = nr;
+		for (p = 1; p <= i__2; ++p) {
+		    if (p > q && (r__1 = a[p + q * a_dim1], abs(r__1)) <= 
+			    temp1 || p < q) {
+			a[p + q * a_dim1] = r_sign(&temp1, &a[p + q * a_dim1])
+				;
+		    }
+/* L1949: */
+		}
+/* L1947: */
+	    }
+	} else {
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + 1], 
+		    lda);
+	}
+
+/*           triangular matrix (plus perturbation which is ignored in */
+/*           the part which destroys triangular form (confusing?!)) */
+
+	sgesvj_("L", "NoU", "NoV", &nr, &nr, &a[a_offset], lda, &sva[1], n, &
+		v[v_offset], ldv, &work[1], lwork, info);
+
+	scalem = work[1];
+	numrank = i_nint(&work[2]);
+
+
+    } else if (rsvec && ! lsvec) {
+
+/*        -> Singular Values and Right Singular Vectors <- */
+
+	if (almort) {
+
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n - p + 1;
+		scopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], &
+			c__1);
+/* L1998: */
+	    }
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 
+		    1], ldv);
+
+	    sgesvj_("L", "U", "N", n, &nr, &v[v_offset], ldv, &sva[1], &nr, &
+		    a[a_offset], lda, &work[1], lwork, info);
+	    scalem = work[1];
+	    numrank = i_nint(&work[2]);
+	} else {
+
+/*        accumulated product of Jacobi rotations, three are perfect ) */
+
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    slaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &a[a_dim1 + 2], 
+		    lda);
+	    i__1 = *lwork - *n;
+	    sgelqf_(&nr, n, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1,
+		     &ierr);
+	    slacpy_("Lower", &nr, &nr, &a[a_offset], lda, &v[v_offset], ldv);
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 
+		    1], ldv);
+	    i__1 = *lwork - (*n << 1);
+	    sgeqrf_(&nr, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << 
+		    1) + 1], &i__1, &ierr);
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = nr - p + 1;
+		scopy_(&i__2, &v[p + p * v_dim1], ldv, &v[p + p * v_dim1], &
+			c__1);
+/* L8998: */
+	    }
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 
+		    1], ldv);
+
+	    i__1 = *lwork - *n;
+	    sgesvj_("Lower", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[1], &
+		    nr, &u[u_offset], ldu, &work[*n + 1], &i__1, info);
+	    scalem = work[*n + 1];
+	    numrank = i_nint(&work[*n + 2]);
+	    if (nr < *n) {
+		i__1 = *n - nr;
+		slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], 
+			ldv);
+		i__1 = *n - nr;
+		slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 
+			+ 1], ldv);
+		i__1 = *n - nr;
+		i__2 = *n - nr;
+		slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + 
+			1) * v_dim1], ldv);
+	    }
+
+	    i__1 = *lwork - *n;
+	    sormlq_("Left", "Transpose", n, n, &nr, &a[a_offset], lda, &work[
+		    1], &v[v_offset], ldv, &work[*n + 1], &i__1, &ierr);
+
+	}
+
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    scopy_(n, &v[p + v_dim1], ldv, &a[iwork[p] + a_dim1], lda);
+/* L8991: */
+	}
+	slacpy_("All", n, n, &a[a_offset], lda, &v[v_offset], ldv);
+
+	if (transp) {
+	    slacpy_("All", n, n, &v[v_offset], ldv, &u[u_offset], ldu);
+	}
+
+    } else if (lsvec && ! rsvec) {
+
+
+/*        Jacobi rotations in the Jacobi iterations. */
+	i__1 = nr;
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = *n - p + 1;
+	    scopy_(&i__2, &a[p + p * a_dim1], lda, &u[p + p * u_dim1], &c__1);
+/* L1965: */
+	}
+	i__1 = nr - 1;
+	i__2 = nr - 1;
+	slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], 
+		ldu);
+
+	i__1 = *lwork - (*n << 1);
+	sgeqrf_(n, &nr, &u[u_offset], ldu, &work[*n + 1], &work[(*n << 1) + 1]
+		, &i__1, &ierr);
+
+	i__1 = nr - 1;
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = nr - p;
+	    scopy_(&i__2, &u[p + (p + 1) * u_dim1], ldu, &u[p + 1 + p * 
+		    u_dim1], &c__1);
+/* L1967: */
+	}
+	i__1 = nr - 1;
+	i__2 = nr - 1;
+	slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], 
+		ldu);
+
+	i__1 = *lwork - *n;
+	sgesvj_("Lower", "U", "N", &nr, &nr, &u[u_offset], ldu, &sva[1], &nr, 
+		&a[a_offset], lda, &work[*n + 1], &i__1, info);
+	scalem = work[*n + 1];
+	numrank = i_nint(&work[*n + 2]);
+
+	if (nr < *m) {
+	    i__1 = *m - nr;
+	    slaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], ldu);
+	    if (nr < n1) {
+		i__1 = n1 - nr;
+		slaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * u_dim1 
+			+ 1], ldu);
+		i__1 = *m - nr;
+		i__2 = n1 - nr;
+		slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (nr + 
+			1) * u_dim1], ldu);
+	    }
+	}
+
+	i__1 = *lwork - *n;
+	sormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], &u[
+		u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+
+	if (rowpiv) {
+	    i__1 = *m - 1;
+	    slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + 
+		    1], &c_n1);
+	}
+
+	i__1 = n1;
+	for (p = 1; p <= i__1; ++p) {
+	    xsc = 1.f / snrm2_(m, &u[p * u_dim1 + 1], &c__1);
+	    sscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1);
+/* L1974: */
+	}
+
+	if (transp) {
+	    slacpy_("All", n, n, &u[u_offset], ldu, &v[v_offset], ldv);
+	}
+
+    } else {
+
+
+	if (! jracc) {
+
+	    if (! almort) {
+
+/*           Second Preconditioning Step (QRF [with pivoting]) */
+/*           Note that the composition of TRANSPOSE, QRF and TRANSPOSE is */
+/*           equivalent to an LQF CALL. Since in many libraries the QRF */
+/*           seems to be better optimized than the LQF, we do explicit */
+/*           transpose and use the QRF. This is subject to changes in an */
+/*           optimized implementation of SGEJSV. */
+
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = *n - p + 1;
+		    scopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1],
+			     &c__1);
+/* L1968: */
+		}
+
+/*           denormals in the second QR factorization, where they are */
+/*           as good as zeros. This is done to avoid painfully slow */
+/*           computation with denormals. The relative size of the perturbation */
+/*           is a parameter that can be changed by the implementer. */
+/*           This perturbation device will be obsolete on machines with */
+/*           properly implemented arithmetic. */
+/*           To switch it off, set L2PERT=.FALSE. To remove it from  the */
+/*           code, remove the action under L2PERT=.TRUE., leave the ELSE part. */
+/*           The following two loops should be blocked and fused with the */
+/*           transposed copy above. */
+
+		if (l2pert) {
+		    xsc = sqrt(small);
+		    i__1 = nr;
+		    for (q = 1; q <= i__1; ++q) {
+			temp1 = xsc * (r__1 = v[q + q * v_dim1], abs(r__1));
+			i__2 = *n;
+			for (p = 1; p <= i__2; ++p) {
+			    if (p > q && (r__1 = v[p + q * v_dim1], abs(r__1))
+				     <= temp1 || p < q) {
+				v[p + q * v_dim1] = r_sign(&temp1, &v[p + q * 
+					v_dim1]);
+			    }
+			    if (p < q) {
+				v[p + q * v_dim1] = -v[p + q * v_dim1];
+			    }
+/* L2968: */
+			}
+/* L2969: */
+		    }
+		} else {
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 
+			    1) + 1], ldv);
+		}
+
+/*           Estimate the row scaled condition number of R1 */
+/*           (If R1 is rectangular, N > NR, then the condition number */
+/*           of the leading NR x NR submatrix is estimated.) */
+
+		slacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1]
+			, &nr);
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = nr - p + 1;
+		    temp1 = snrm2_(&i__2, &work[(*n << 1) + (p - 1) * nr + p],
+			     &c__1);
+		    i__2 = nr - p + 1;
+		    r__1 = 1.f / temp1;
+		    sscal_(&i__2, &r__1, &work[(*n << 1) + (p - 1) * nr + p], 
+			    &c__1);
+/* L3950: */
+		}
+		spocon_("Lower", &nr, &work[(*n << 1) + 1], &nr, &c_b35, &
+			temp1, &work[(*n << 1) + nr * nr + 1], &iwork[*m + (*
+			n << 1) + 1], &ierr);
+		condr1 = 1.f / sqrt(temp1);
+/*           R1 is OK for inverse <=> CONDR1 .LT. FLOAT(N) */
+/*           more conservative    <=> CONDR1 .LT. SQRT(FLOAT(N)) */
+
+		cond_ok__ = sqrt((real) nr);
+/* [TP]       COND_OK is a tuning parameter. */
+		if (condr1 < cond_ok__) {
+/*              implementation, this QRF should be implemented as the QRF */
+/*              of a lower triangular matrix. */
+/*              R1^t = Q2 * R2 */
+		    i__1 = *lwork - (*n << 1);
+		    sgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*
+			    n << 1) + 1], &i__1, &ierr);
+
+		    if (l2pert) {
+			xsc = sqrt(small) / epsln;
+			i__1 = nr;
+			for (p = 2; p <= i__1; ++p) {
+			    i__2 = p - 1;
+			    for (q = 1; q <= i__2; ++q) {
+/* Computing MIN */
+				r__3 = (r__1 = v[p + p * v_dim1], abs(r__1)), 
+					r__4 = (r__2 = v[q + q * v_dim1], abs(
+					r__2));
+				temp1 = xsc * f2cmin(r__3,r__4);
+				if ((r__1 = v[q + p * v_dim1], abs(r__1)) <= 
+					temp1) {
+				    v[q + p * v_dim1] = r_sign(&temp1, &v[q + 
+					    p * v_dim1]);
+				}
+/* L3958: */
+			    }
+/* L3959: */
+			}
+		    }
+
+		    if (nr != *n) {
+			slacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << 
+				1) + 1], n);
+		    }
+
+		    i__1 = nr - 1;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = nr - p;
+			scopy_(&i__2, &v[p + (p + 1) * v_dim1], ldv, &v[p + 1 
+				+ p * v_dim1], &c__1);
+/* L1969: */
+		    }
+
+		    condr2 = condr1;
+
+		} else {
+
+/*              Note that windowed pivoting would be equally good */
+/*              numerically, and more run-time efficient. So, in */
+/*              an optimal implementation, the next call to SGEQP3 */
+/*              should be replaced with eg. CALL SGEQPX (ACM TOMS #782) */
+/*              with properly (carefully) chosen parameters. */
+
+/*              R1^t * P2 = Q2 * R2 */
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			iwork[*n + p] = 0;
+/* L3003: */
+		    }
+		    i__1 = *lwork - (*n << 1);
+		    sgeqp3_(n, &nr, &v[v_offset], ldv, &iwork[*n + 1], &work[*
+			    n + 1], &work[(*n << 1) + 1], &i__1, &ierr);
+/* *               CALL SGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), */
+/* *     $              LWORK-2*N, IERR ) */
+		    if (l2pert) {
+			xsc = sqrt(small);
+			i__1 = nr;
+			for (p = 2; p <= i__1; ++p) {
+			    i__2 = p - 1;
+			    for (q = 1; q <= i__2; ++q) {
+/* Computing MIN */
+				r__3 = (r__1 = v[p + p * v_dim1], abs(r__1)), 
+					r__4 = (r__2 = v[q + q * v_dim1], abs(
+					r__2));
+				temp1 = xsc * f2cmin(r__3,r__4);
+				if ((r__1 = v[q + p * v_dim1], abs(r__1)) <= 
+					temp1) {
+				    v[q + p * v_dim1] = r_sign(&temp1, &v[q + 
+					    p * v_dim1]);
+				}
+/* L3968: */
+			    }
+/* L3969: */
+			}
+		    }
+
+		    slacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 
+			    1], n);
+
+		    if (l2pert) {
+			xsc = sqrt(small);
+			i__1 = nr;
+			for (p = 2; p <= i__1; ++p) {
+			    i__2 = p - 1;
+			    for (q = 1; q <= i__2; ++q) {
+/* Computing MIN */
+				r__3 = (r__1 = v[p + p * v_dim1], abs(r__1)), 
+					r__4 = (r__2 = v[q + q * v_dim1], abs(
+					r__2));
+				temp1 = xsc * f2cmin(r__3,r__4);
+				v[p + q * v_dim1] = -r_sign(&temp1, &v[q + p *
+					 v_dim1]);
+/* L8971: */
+			    }
+/* L8970: */
+			}
+		    } else {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			slaset_("L", &i__1, &i__2, &c_b34, &c_b34, &v[v_dim1 
+				+ 2], ldv);
+		    }
+/*              Now, compute R2 = L3 * Q3, the LQ factorization. */
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    sgelqf_(&nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + *n 
+			    * nr + 1], &work[(*n << 1) + *n * nr + nr + 1], &
+			    i__1, &ierr);
+		    slacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) 
+			    + *n * nr + nr + 1], &nr);
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			temp1 = snrm2_(&p, &work[(*n << 1) + *n * nr + nr + p]
+				, &nr);
+			r__1 = 1.f / temp1;
+			sscal_(&p, &r__1, &work[(*n << 1) + *n * nr + nr + p],
+				 &nr);
+/* L4950: */
+		    }
+		    spocon_("L", &nr, &work[(*n << 1) + *n * nr + nr + 1], &
+			    nr, &c_b35, &temp1, &work[(*n << 1) + *n * nr + 
+			    nr + nr * nr + 1], &iwork[*m + (*n << 1) + 1], &
+			    ierr);
+		    condr2 = 1.f / sqrt(temp1);
+
+		    if (condr2 >= cond_ok__) {
+/*                 (this overwrites the copy of R2, as it will not be */
+/*                 needed in this branch, but it does not overwritte the */
+/*                 Huseholder vectors of Q2.). */
+			slacpy_("U", &nr, &nr, &v[v_offset], ldv, &work[(*n <<
+				 1) + 1], n);
+/*                 WORK(2*N+N*NR+1:2*N+N*NR+N) */
+		    }
+
+		}
+
+		if (l2pert) {
+		    xsc = sqrt(small);
+		    i__1 = nr;
+		    for (q = 2; q <= i__1; ++q) {
+			temp1 = xsc * v[q + q * v_dim1];
+			i__2 = q - 1;
+			for (p = 1; p <= i__2; ++p) {
+/*                    V(p,q) = - SIGN( TEMP1, V(q,p) ) */
+			    v[p + q * v_dim1] = -r_sign(&temp1, &v[p + q * 
+				    v_dim1]);
+/* L4969: */
+			}
+/* L4968: */
+		    }
+		} else {
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 
+			    1) + 1], ldv);
+		}
+
+/*        Second preconditioning finished; continue with Jacobi SVD */
+/*        The input matrix is lower trinagular. */
+
+/*        Recover the right singular vectors as solution of a well */
+/*        conditioned triangular matrix equation. */
+
+		if (condr1 < cond_ok__) {
+
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    sgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[
+			    1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n *
+			     nr + nr + 1], &i__1, info);
+		    scalem = work[(*n << 1) + *n * nr + nr + 1];
+		    numrank = i_nint(&work[(*n << 1) + *n * nr + nr + 2]);
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			scopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 
+				+ 1], &c__1);
+			sscal_(&nr, &sva[p], &v[p * v_dim1 + 1], &c__1);
+/* L3970: */
+		    }
+
+		    if (nr == *n) {
+/* :))             .. best case, R1 is inverted. The solution of this matrix */
+/*                 equation is Q2*V2 = the product of the Jacobi rotations */
+/*                 used in SGESVJ, premultiplied with the orthogonal matrix */
+/*                 from the second QR factorization. */
+			strsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &a[
+				a_offset], lda, &v[v_offset], ldv);
+		    } else {
+/*                 is inverted to get the product of the Jacobi rotations */
+/*                 used in SGESVJ. The Q-factor from the second QR */
+/*                 factorization is then built in explicitly. */
+			strsm_("L", "U", "T", "N", &nr, &nr, &c_b35, &work[(*
+				n << 1) + 1], n, &v[v_offset], ldv);
+			if (nr < *n) {
+			    i__1 = *n - nr;
+			    slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 
+				    1 + v_dim1], ldv);
+			    i__1 = *n - nr;
+			    slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 
+				    1) * v_dim1 + 1], ldv);
+			    i__1 = *n - nr;
+			    i__2 = *n - nr;
+			    slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr 
+				    + 1 + (nr + 1) * v_dim1], ldv);
+			}
+			i__1 = *lwork - (*n << 1) - *n * nr - nr;
+			sormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, 
+				&work[*n + 1], &v[v_offset], ldv, &work[(*n <<
+				 1) + *n * nr + nr + 1], &i__1, &ierr);
+		    }
+
+		} else if (condr2 < cond_ok__) {
+
+/* :)           .. the input matrix A is very likely a relative of */
+/*              the Kahan matrix :) */
+/*              The matrix R2 is inverted. The solution of the matrix equation */
+/*              is Q3^T*V3 = the product of the Jacobi rotations (appplied to */
+/*              the lower triangular L3 from the LQ factorization of */
+/*              R2=L3*Q3), pre-multiplied with the transposed Q3. */
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    sgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[
+			    1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n *
+			     nr + nr + 1], &i__1, info);
+		    scalem = work[(*n << 1) + *n * nr + nr + 1];
+		    numrank = i_nint(&work[(*n << 1) + *n * nr + nr + 2]);
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			scopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 
+				+ 1], &c__1);
+			sscal_(&nr, &sva[p], &u[p * u_dim1 + 1], &c__1);
+/* L3870: */
+		    }
+		    strsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &work[(*n << 
+			    1) + 1], n, &u[u_offset], ldu);
+		    i__1 = nr;
+		    for (q = 1; q <= i__1; ++q) {
+			i__2 = nr;
+			for (p = 1; p <= i__2; ++p) {
+			    work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = 
+				    u[p + q * u_dim1];
+/* L872: */
+			}
+			i__2 = nr;
+			for (p = 1; p <= i__2; ++p) {
+			    u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr 
+				    + p];
+/* L874: */
+			}
+/* L873: */
+		    }
+		    if (nr < *n) {
+			i__1 = *n - nr;
+			slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + 
+				v_dim1], ldv);
+			i__1 = *n - nr;
+			slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) *
+				 v_dim1 + 1], ldv);
+			i__1 = *n - nr;
+			i__2 = *n - nr;
+			slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 
+				+ (nr + 1) * v_dim1], ldv);
+		    }
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    sormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &
+			    work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) 
+			    + *n * nr + nr + 1], &i__1, &ierr);
+		} else {
+/*              Last line of defense. */
+/* #:(          This is a rather pathological case: no scaled condition */
+/*              improvement after two pivoted QR factorizations. Other */
+/*              possibility is that the rank revealing QR factorization */
+/*              or the condition estimator has failed, or the COND_OK */
+/*              is set very close to ONE (which is unnecessary). Normally, */
+/*              this branch should never be executed, but in rare cases of */
+/*              failure of the RRQR or condition estimator, the last line of */
+/*              defense ensures that SGEJSV completes the task. */
+/*              Compute the full SVD of L3 using SGESVJ with explicit */
+/*              accumulation of Jacobi rotations. */
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    sgesvj_("L", "U", "V", &nr, &nr, &v[v_offset], ldv, &sva[
+			    1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n *
+			     nr + nr + 1], &i__1, info);
+		    scalem = work[(*n << 1) + *n * nr + nr + 1];
+		    numrank = i_nint(&work[(*n << 1) + *n * nr + nr + 2]);
+		    if (nr < *n) {
+			i__1 = *n - nr;
+			slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + 
+				v_dim1], ldv);
+			i__1 = *n - nr;
+			slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) *
+				 v_dim1 + 1], ldv);
+			i__1 = *n - nr;
+			i__2 = *n - nr;
+			slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 
+				+ (nr + 1) * v_dim1], ldv);
+		    }
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    sormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &
+			    work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) 
+			    + *n * nr + nr + 1], &i__1, &ierr);
+
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    sormlq_("L", "T", &nr, &nr, &nr, &work[(*n << 1) + 1], n, 
+			    &work[(*n << 1) + *n * nr + 1], &u[u_offset], ldu,
+			     &work[(*n << 1) + *n * nr + nr + 1], &i__1, &
+			    ierr);
+		    i__1 = nr;
+		    for (q = 1; q <= i__1; ++q) {
+			i__2 = nr;
+			for (p = 1; p <= i__2; ++p) {
+			    work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = 
+				    u[p + q * u_dim1];
+/* L772: */
+			}
+			i__2 = nr;
+			for (p = 1; p <= i__2; ++p) {
+			    u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr 
+				    + p];
+/* L774: */
+			}
+/* L773: */
+		    }
+
+		}
+
+/*           Permute the rows of V using the (column) permutation from the */
+/*           first QRF. Also, scale the columns to make them unit in */
+/*           Euclidean norm. This applies to all cases. */
+
+		temp1 = sqrt((real) (*n)) * epsln;
+		i__1 = *n;
+		for (q = 1; q <= i__1; ++q) {
+		    i__2 = *n;
+		    for (p = 1; p <= i__2; ++p) {
+			work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * 
+				v_dim1];
+/* L972: */
+		    }
+		    i__2 = *n;
+		    for (p = 1; p <= i__2; ++p) {
+			v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p]
+				;
+/* L973: */
+		    }
+		    xsc = 1.f / snrm2_(n, &v[q * v_dim1 + 1], &c__1);
+		    if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) {
+			sscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1);
+		    }
+/* L1972: */
+		}
+/*           At this moment, V contains the right singular vectors of A. */
+/*           Next, assemble the left singular vector matrix U (M x N). */
+		if (nr < *m) {
+		    i__1 = *m - nr;
+		    slaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + 
+			    u_dim1], ldu);
+		    if (nr < n1) {
+			i__1 = n1 - nr;
+			slaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) *
+				 u_dim1 + 1], ldu);
+			i__1 = *m - nr;
+			i__2 = n1 - nr;
+			slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 
+				+ (nr + 1) * u_dim1], ldu);
+		    }
+		}
+
+/*           The Q matrix from the first QRF is built into the left singular */
+/*           matrix U. This applies to all cases. */
+
+		i__1 = *lwork - *n;
+		sormqr_("Left", "No_Tr", m, &n1, n, &a[a_offset], lda, &work[
+			1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+/*           The columns of U are normalized. The cost is O(M*N) flops. */
+		temp1 = sqrt((real) (*m)) * epsln;
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    xsc = 1.f / snrm2_(m, &u[p * u_dim1 + 1], &c__1);
+		    if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) {
+			sscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1);
+		    }
+/* L1973: */
+		}
+
+/*           If the initial QRF is computed with row pivoting, the left */
+/*           singular vectors must be adjusted. */
+
+		if (rowpiv) {
+		    i__1 = *m - 1;
+		    slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n 
+			    << 1) + 1], &c_n1);
+		}
+
+	    } else {
+
+/*        the second QRF is not needed */
+
+		slacpy_("Upper", n, n, &a[a_offset], lda, &work[*n + 1], n);
+		if (l2pert) {
+		    xsc = sqrt(small);
+		    i__1 = *n;
+		    for (p = 2; p <= i__1; ++p) {
+			temp1 = xsc * work[*n + (p - 1) * *n + p];
+			i__2 = p - 1;
+			for (q = 1; q <= i__2; ++q) {
+			    work[*n + (q - 1) * *n + p] = -r_sign(&temp1, &
+				    work[*n + (p - 1) * *n + q]);
+/* L5971: */
+			}
+/* L5970: */
+		    }
+		} else {
+		    i__1 = *n - 1;
+		    i__2 = *n - 1;
+		    slaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &work[*n + 
+			    2], n);
+		}
+
+		i__1 = *lwork - *n - *n * *n;
+		sgesvj_("Upper", "U", "N", n, n, &work[*n + 1], n, &sva[1], n,
+			 &u[u_offset], ldu, &work[*n + *n * *n + 1], &i__1, 
+			info);
+
+		scalem = work[*n + *n * *n + 1];
+		numrank = i_nint(&work[*n + *n * *n + 2]);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    scopy_(n, &work[*n + (p - 1) * *n + 1], &c__1, &u[p * 
+			    u_dim1 + 1], &c__1);
+		    sscal_(n, &sva[p], &work[*n + (p - 1) * *n + 1], &c__1);
+/* L6970: */
+		}
+
+		strsm_("Left", "Upper", "NoTrans", "No UD", n, n, &c_b35, &a[
+			a_offset], lda, &work[*n + 1], n);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    scopy_(n, &work[*n + p], n, &v[iwork[p] + v_dim1], ldv);
+/* L6972: */
+		}
+		temp1 = sqrt((real) (*n)) * epsln;
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    xsc = 1.f / snrm2_(n, &v[p * v_dim1 + 1], &c__1);
+		    if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) {
+			sscal_(n, &xsc, &v[p * v_dim1 + 1], &c__1);
+		    }
+/* L6971: */
+		}
+
+/*           Assemble the left singular vector matrix U (M x N). */
+
+		if (*n < *m) {
+		    i__1 = *m - *n;
+		    slaset_("A", &i__1, n, &c_b34, &c_b34, &u[*n + 1 + u_dim1]
+			    , ldu);
+		    if (*n < n1) {
+			i__1 = n1 - *n;
+			slaset_("A", n, &i__1, &c_b34, &c_b34, &u[(*n + 1) * 
+				u_dim1 + 1], ldu);
+			i__1 = *m - *n;
+			i__2 = n1 - *n;
+			slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[*n + 1 
+				+ (*n + 1) * u_dim1], ldu);
+		    }
+		}
+		i__1 = *lwork - *n;
+		sormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[
+			1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+		temp1 = sqrt((real) (*m)) * epsln;
+		i__1 = n1;
+		for (p = 1; p <= i__1; ++p) {
+		    xsc = 1.f / snrm2_(m, &u[p * u_dim1 + 1], &c__1);
+		    if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) {
+			sscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1);
+		    }
+/* L6973: */
+		}
+
+		if (rowpiv) {
+		    i__1 = *m - 1;
+		    slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n 
+			    << 1) + 1], &c_n1);
+		}
+
+	    }
+
+/*        end of the  >> almost orthogonal case <<  in the full SVD */
+
+	} else {
+
+/*        This branch deploys a preconditioned Jacobi SVD with explicitly */
+/*        accumulated rotations. It is included as optional, mainly for */
+/*        experimental purposes. It does perform well, and can also be used. */
+/*        In this implementation, this branch will be automatically activated */
+/*        if the  condition number sigma_max(A) / sigma_min(A) is predicted */
+/*        to be greater than the overflow threshold. This is because the */
+/*        a posteriori computation of the singular vectors assumes robust */
+/*        implementation of BLAS and some LAPACK procedures, capable of working */
+/*        in presence of extreme values. Since that is not always the case, ... */
+
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n - p + 1;
+		scopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], &
+			c__1);
+/* L7968: */
+	    }
+
+	    if (l2pert) {
+		xsc = sqrt(small / epsln);
+		i__1 = nr;
+		for (q = 1; q <= i__1; ++q) {
+		    temp1 = xsc * (r__1 = v[q + q * v_dim1], abs(r__1));
+		    i__2 = *n;
+		    for (p = 1; p <= i__2; ++p) {
+			if (p > q && (r__1 = v[p + q * v_dim1], abs(r__1)) <= 
+				temp1 || p < q) {
+			    v[p + q * v_dim1] = r_sign(&temp1, &v[p + q * 
+				    v_dim1]);
+			}
+			if (p < q) {
+			    v[p + q * v_dim1] = -v[p + q * v_dim1];
+			}
+/* L5968: */
+		    }
+/* L5969: */
+		}
+	    } else {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 
+			1], ldv);
+	    }
+	    i__1 = *lwork - (*n << 1);
+	    sgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << 1) 
+		    + 1], &i__1, &ierr);
+	    slacpy_("L", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1], n);
+
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = nr - p + 1;
+		scopy_(&i__2, &v[p + p * v_dim1], ldv, &u[p + p * u_dim1], &
+			c__1);
+/* L7969: */
+	    }
+	    if (l2pert) {
+		xsc = sqrt(small / epsln);
+		i__1 = nr;
+		for (q = 2; q <= i__1; ++q) {
+		    i__2 = q - 1;
+		    for (p = 1; p <= i__2; ++p) {
+/* Computing MIN */
+			r__3 = (r__1 = u[p + p * u_dim1], abs(r__1)), r__4 = (
+				r__2 = u[q + q * u_dim1], abs(r__2));
+			temp1 = xsc * f2cmin(r__3,r__4);
+			u[p + q * u_dim1] = -r_sign(&temp1, &u[q + p * u_dim1]
+				);
+/* L9971: */
+		    }
+/* L9970: */
+		}
+	    } else {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 
+			1], ldu);
+	    }
+	    i__1 = *lwork - (*n << 1) - *n * nr;
+	    sgesvj_("L", "U", "V", &nr, &nr, &u[u_offset], ldu, &sva[1], n, &
+		    v[v_offset], ldv, &work[(*n << 1) + *n * nr + 1], &i__1, 
+		    info);
+	    scalem = work[(*n << 1) + *n * nr + 1];
+	    numrank = i_nint(&work[(*n << 1) + *n * nr + 2]);
+	    if (nr < *n) {
+		i__1 = *n - nr;
+		slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], 
+			ldv);
+		i__1 = *n - nr;
+		slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 
+			+ 1], ldv);
+		i__1 = *n - nr;
+		i__2 = *n - nr;
+		slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + 
+			1) * v_dim1], ldv);
+	    }
+	    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+	    sormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &work[*n + 
+		    1], &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1]
+		    , &i__1, &ierr);
+
+/*           Permute the rows of V using the (column) permutation from the */
+/*           first QRF. Also, scale the columns to make them unit in */
+/*           Euclidean norm. This applies to all cases. */
+
+	    temp1 = sqrt((real) (*n)) * epsln;
+	    i__1 = *n;
+	    for (q = 1; q <= i__1; ++q) {
+		i__2 = *n;
+		for (p = 1; p <= i__2; ++p) {
+		    work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * 
+			    v_dim1];
+/* L8972: */
+		}
+		i__2 = *n;
+		for (p = 1; p <= i__2; ++p) {
+		    v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p];
+/* L8973: */
+		}
+		xsc = 1.f / snrm2_(n, &v[q * v_dim1 + 1], &c__1);
+		if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) {
+		    sscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1);
+		}
+/* L7972: */
+	    }
+
+/*           At this moment, V contains the right singular vectors of A. */
+/*           Next, assemble the left singular vector matrix U (M x N). */
+
+	    if (nr < *m) {
+		i__1 = *m - nr;
+		slaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], 
+			ldu);
+		if (nr < n1) {
+		    i__1 = n1 - nr;
+		    slaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * 
+			    u_dim1 + 1], ldu);
+		    i__1 = *m - nr;
+		    i__2 = n1 - nr;
+		    slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (
+			    nr + 1) * u_dim1], ldu);
+		}
+	    }
+
+	    i__1 = *lwork - *n;
+	    sormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], &
+		    u[u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+
+	    if (rowpiv) {
+		i__1 = *m - 1;
+		slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1)
+			 + 1], &c_n1);
+	    }
+
+
+	}
+	if (transp) {
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		sswap_(n, &u[p * u_dim1 + 1], &c__1, &v[p * v_dim1 + 1], &
+			c__1);
+/* L6974: */
+	    }
+	}
+
+    }
+/*     end of the full SVD */
+
+/*     Undo scaling, if necessary (and possible) */
+
+    if (uscal2 <= big / sva[1] * uscal1) {
+	slascl_("G", &c__0, &c__0, &uscal1, &uscal2, &nr, &c__1, &sva[1], n, &
+		ierr);
+	uscal1 = 1.f;
+	uscal2 = 1.f;
+    }
+
+    if (nr < *n) {
+	i__1 = *n;
+	for (p = nr + 1; p <= i__1; ++p) {
+	    sva[p] = 0.f;
+/* L3004: */
+	}
+    }
+
+    work[1] = uscal2 * scalem;
+    work[2] = uscal1;
+    if (errest) {
+	work[3] = sconda;
+    }
+    if (lsvec && rsvec) {
+	work[4] = condr1;
+	work[5] = condr2;
+    }
+    if (l2tran) {
+	work[6] = entra;
+	work[7] = entrat;
+    }
+
+    iwork[1] = nr;
+    iwork[2] = numrank;
+    iwork[3] = warning;
+
+    return 0;
+} /* sgejsv_ */
+
diff --git a/lapack-netlib/SRC/sgelq.c b/lapack-netlib/SRC/sgelq.c
new file mode 100644
index 000000000..a135a90ee
--- /dev/null
+++ b/lapack-netlib/SRC/sgelq.c
@@ -0,0 +1,742 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__2 = 2;
+
+/* > \brief \b SGELQ */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, */
+/*                         INFO ) */
+
+/*       INTEGER           INFO, LDA, M, N, TSIZE, LWORK */
+/*       REAL              A( LDA, * ), T( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGELQ computes an LQ factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = ( L 0 ) *  Q */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a N-by-N orthogonal matrix; */
+/* >    L is a lower-triangular M-by-M matrix; */
+/* >    0 is a M-by-(N-M) zero matrix, if M < N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and below the diagonal of the array */
+/* >          contain the M-by-f2cmin(M,N) lower trapezoidal matrix L */
+/* >          (L is lower triangular if M <= N); */
+/* >          the elements above the diagonal are used to store part of the */
+/* >          data structure to represent Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (MAX(5,TSIZE)) */
+/* >          On exit, if INFO = 0, T(1) returns optimal (or either minimal */
+/* >          or optimal, if query is assumed) TSIZE. See TSIZE for details. */
+/* >          Remaining T contains part of the data structure used to represent Q. */
+/* >          If one wants to apply or construct Q, then one needs to keep T */
+/* >          (in addition to A) and pass it to further subroutines. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TSIZE */
+/* > \verbatim */
+/* >          TSIZE is INTEGER */
+/* >          If TSIZE >= 5, the dimension of the array T. */
+/* >          If TSIZE = -1 or -2, then a workspace query is assumed. The routine */
+/* >          only calculates the sizes of the T and WORK arrays, returns these */
+/* >          values as the first entries of the T and WORK arrays, and no error */
+/* >          message related to T or WORK is issued by XERBLA. */
+/* >          If TSIZE = -1, the routine calculates optimal size of T for the */
+/* >          optimum performance and returns this value in T(1). */
+/* >          If TSIZE = -2, the routine calculates minimal size of T and */
+/* >          returns this value in T(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          (workspace) REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
+/* >          or optimal, if query was assumed) LWORK. */
+/* >          See LWORK for details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1 or -2, then a workspace query is assumed. The routine */
+/* >          only calculates the sizes of the T and WORK arrays, returns these */
+/* >          values as the first entries of the T and WORK arrays, and no error */
+/* >          message related to T or WORK is issued by XERBLA. */
+/* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
+/* >          optimal performance and returns this value in WORK(1). */
+/* >          If LWORK = -2, the routine calculates minimal size of WORK and */
+/* >          returns this value in WORK(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \par Further Details */
+/*  ==================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The goal of the interface is to give maximum freedom to the developers for */
+/* > creating any LQ factorization algorithm they wish. The triangular */
+/* > (trapezoidal) L has to be stored in the lower part of A. The lower part of A */
+/* > and the array T can be used to store any relevant information for applying or */
+/* > constructing the Q factor. The WORK array can safely be discarded after exit. */
+/* > */
+/* > Caution: One should not expect the sizes of T and WORK to be the same from one */
+/* > LAPACK implementation to the other, or even from one execution to the other. */
+/* > A workspace query (for T and WORK) is needed at each execution. However, */
+/* > for a given execution, the size of T and WORK are fixed and will not change */
+/* > from one query to the next. */
+/* > */
+/* > \endverbatim */
+/* > */
+/* > \par Further Details particular to this LAPACK implementation: */
+/*  ============================================================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > These details are particular for this LAPACK implementation. Users should not */
+/* > take them for granted. These details may change in the future, and are not likely */
+/* > true for another LAPACK implementation. These details are relevant if one wants */
+/* > to try to understand the code. They are not part of the interface. */
+/* > */
+/* > In this version, */
+/* > */
+/* >          T(2): row block size (MB) */
+/* >          T(3): column block size (NB) */
+/* >          T(6:TSIZE): data structure needed for Q, computed by */
+/* >                           SLASWLQ or SGELQT */
+/* > */
+/* >  Depending on the matrix dimensions M and N, and row and column */
+/* >  block sizes MB and NB returned by ILAENV, SGELQ will use either */
+/* >  SLASWLQ (if the matrix is short-and-wide) or SGELQT to compute */
+/* >  the LQ factorization. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgelq_(integer *m, integer *n, real *a, integer *lda, 
+	real *t, integer *tsize, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    logical mint, minw;
+    integer lwmin, lwreq, lwopt, mb, nb, nblcks;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sgelqt_(integer *, integer *, integer *, real 
+	    *, integer *, real *, integer *, real *, integer *);
+    logical lminws, lquery;
+    integer mintsz;
+    extern /* Subroutine */ int slaswlq_(integer *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , integer *);
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --t;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2;
+
+    mint = FALSE_;
+    minw = FALSE_;
+    if (*tsize == -2 || *lwork == -2) {
+	if (*tsize != -1) {
+	    mint = TRUE_;
+	}
+	if (*lwork != -1) {
+	    minw = TRUE_;
+	}
+    }
+
+/*     Determine the block size */
+
+    if (f2cmin(*m,*n) > 0) {
+	mb = ilaenv_(&c__1, "SGELQ ", " ", m, n, &c__1, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	nb = ilaenv_(&c__1, "SGELQ ", " ", m, n, &c__2, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+    } else {
+	mb = 1;
+	nb = *n;
+    }
+    if (mb > f2cmin(*m,*n) || mb < 1) {
+	mb = 1;
+    }
+    if (nb > *n || nb <= *m) {
+	nb = *n;
+    }
+    mintsz = *m + 5;
+    if (nb > *m && *n > *m) {
+	if ((*n - *m) % (nb - *m) == 0) {
+	    nblcks = (*n - *m) / (nb - *m);
+	} else {
+	    nblcks = (*n - *m) / (nb - *m) + 1;
+	}
+    } else {
+	nblcks = 1;
+    }
+
+/*     Determine if the workspace size satisfies minimal size */
+
+    if (*n <= *m || nb <= *m || nb >= *n) {
+	lwmin = f2cmax(1,*n);
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *n;
+	lwopt = f2cmax(i__1,i__2);
+    } else {
+	lwmin = f2cmax(1,*m);
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *m;
+	lwopt = f2cmax(i__1,i__2);
+    }
+    lminws = FALSE_;
+/* Computing MAX */
+    i__1 = 1, i__2 = mb * *m * nblcks + 5;
+    if ((*tsize < f2cmax(i__1,i__2) || *lwork < lwopt) && *lwork >= lwmin && *
+	    tsize >= mintsz && ! lquery) {
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *m * nblcks + 5;
+	if (*tsize < f2cmax(i__1,i__2)) {
+	    lminws = TRUE_;
+	    mb = 1;
+	    nb = *n;
+	}
+	if (*lwork < lwopt) {
+	    lminws = TRUE_;
+	    mb = 1;
+	}
+    }
+    if (*n <= *m || nb <= *m || nb >= *n) {
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *n;
+	lwreq = f2cmax(i__1,i__2);
+    } else {
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *m;
+	lwreq = f2cmax(i__1,i__2);
+    }
+
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *m * nblcks + 5;
+	if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
+	    *info = -6;
+	} else if (*lwork < lwreq && ! lquery && ! lminws) {
+	    *info = -8;
+	}
+    }
+
+    if (*info == 0) {
+	if (mint) {
+	    t[1] = (real) mintsz;
+	} else {
+	    t[1] = (real) (mb * *m * nblcks + 5);
+	}
+	t[2] = (real) mb;
+	t[3] = (real) nb;
+	if (minw) {
+	    work[1] = (real) lwmin;
+	} else {
+	    work[1] = (real) lwreq;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELQ", &i__1, (ftnlen)5);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (f2cmin(*m,*n) == 0) {
+	return 0;
+    }
+
+/*     The LQ Decomposition */
+
+    if (*n <= *m || nb <= *m || nb >= *n) {
+	sgelqt_(m, n, &mb, &a[a_offset], lda, &t[6], &mb, &work[1], info);
+    } else {
+	slaswlq_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &mb, &work[1], 
+		lwork, info);
+    }
+
+    work[1] = (real) lwreq;
+    return 0;
+
+/*     End of SGELQ */
+
+} /* sgelq_ */
+
diff --git a/lapack-netlib/SRC/sgelq2.c b/lapack-netlib/SRC/sgelq2.c
new file mode 100644
index 000000000..d0ac5feb0
--- /dev/null
+++ b/lapack-netlib/SRC/sgelq2.c
@@ -0,0 +1,597 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorit
+hm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGELQ2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgelq2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgelq2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgelq2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGELQ2 computes an LQ factorization of a real m-by-n matrix A: */
+/* > */
+/* >    A = ( L 0 ) *  Q */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a n-by-n orthogonal matrix; */
+/* >    L is an lower-triangular m-by-m matrix; */
+/* >    0 is a m-by-(n-m) zero matrix, if m < n. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, the elements on and below the diagonal of the array */
+/* >          contain the m by f2cmin(m,n) lower trapezoidal matrix L (L is */
+/* >          lower triangular if m <= n); the elements above the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of elementary reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (M) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
+/* >  and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgelq2_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
+	    integer *, real *);
+    real aii;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELQ2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
+
+	i__2 = *n - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * a_dim1]
+		, lda, &tau[i__]);
+	if (i__ < *m) {
+
+/*           Apply H(i) to A(i+1:m,i:n) from the right */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+	    slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGELQ2 */
+
+} /* sgelq2_ */
+
diff --git a/lapack-netlib/SRC/sgelqf.c b/lapack-netlib/SRC/sgelqf.c
new file mode 100644
index 000000000..4488f3edc
--- /dev/null
+++ b/lapack-netlib/SRC/sgelqf.c
@@ -0,0 +1,698 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b SGELQF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGELQF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgelqf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgelqf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgelqf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGELQF computes an LQ factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = ( L 0 ) *  Q */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a N-by-N orthogonal matrix; */
+/* >    L is an lower-triangular M-by-M matrix; */
+/* >    0 is a M-by-(N-M) zero matrix, if M < N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and below the diagonal of the array */
+/* >          contain the m-by-f2cmin(m,n) lower trapezoidal matrix L (L is */
+/* >          lower triangular if m <= n); the elements above the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of elementary reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,M). */
+/* >          For optimum performance LWORK >= M*NB, where NB is the */
+/* >          optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
+/* >  and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgelqf_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo;
+    extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *);
+    integer ib, nb, nx;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
+	    real *, integer *, real *, real *, integer *);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *m * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*lwork < f2cmax(1,*m) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELQF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the LQ factorization of the current block */
+/*           A(i:i+ib-1,i:n) */
+
+	    i__3 = *n - i__ + 1;
+	    sgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *m) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i) H(i+1) . . . H(i+ib-1) */
+
+		i__3 = *n - i__ + 1;
+		slarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * 
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i+ib:m,i:n) from the right */
+
+		i__3 = *m - i__ - ib + 1;
+		i__4 = *n - i__ + 1;
+		slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, 
+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + 
+			1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	sgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGELQF */
+
+} /* sgelqf_ */
+
diff --git a/lapack-netlib/SRC/sgelqt.c b/lapack-netlib/SRC/sgelqt.c
new file mode 100644
index 000000000..149cbc21a
--- /dev/null
+++ b/lapack-netlib/SRC/sgelqt.c
@@ -0,0 +1,601 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGELQT */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) */
+
+/*       INTEGER   INFO, LDA, LDT, M, N, MB */
+/*       REAL      A( LDA, * ), T( LDT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELQT computes a blocked LQ factorization of a real M-by-N matrix A */
+/* > using the compact WY representation of Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MB */
+/* > \verbatim */
+/* >          MB is INTEGER */
+/* >          The block size to be used in the blocked QR.  MIN(M,N) >= MB >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and below the diagonal of the array */
+/* >          contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is */
+/* >          lower triangular if M <= N); the elements above the diagonal */
+/* >          are the rows of V. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT,MIN(M,N)) */
+/* >          The upper triangular block reflectors stored in compact form */
+/* >          as a sequence of upper triangular blocks.  See below */
+/* >          for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= MB. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MB*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th row */
+/* >  above the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1  v1 v1 v1 v1 ) */
+/* >                   (     1  v2 v2 v2 ) */
+/* >                   (         1 v3 v3 ) */
+/* > */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A. */
+/* >  Let K=MIN(M,N).  The number of blocks is B = ceiling(K/MB), where each */
+/* >  block is of order MB except for the last block, which is of order */
+/* >  IB = K - (B-1)*MB.  For each of the B blocks, a upper triangular block */
+/* >  reflector factor is computed: T1, T2, ..., TB.  The MB-by-MB (and IB-by-IB */
+/* >  for the last block) T's are stored in the MB-by-K matrix T as */
+/* > */
+/* >               T = (T1 T2 ... TB). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgelqt_(integer *m, integer *n, integer *mb, real *a, 
+	integer *lda, real *t, integer *ldt, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    integer i__, k, iinfo, ib;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen), sgelqt3_(
+	    integer *, integer *, real *, integer *, real *, integer *, 
+	    integer *);
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/* ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*mb < 1 || *mb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldt < *mb) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELQT", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	return 0;
+    }
+
+/*     Blocked loop of length K */
+
+    i__1 = k;
+    i__2 = *mb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	i__3 = k - i__ + 1;
+	ib = f2cmin(i__3,*mb);
+
+/*     Compute the LQ factorization of the current block A(I:M,I:I+IB-1) */
+
+	i__3 = *n - i__ + 1;
+	sgelqt3_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + 1]
+		, ldt, &iinfo);
+	if (i__ + ib <= *m) {
+
+/*     Update by applying H**T to A(I:M,I+IB:N) from the right */
+
+	    i__3 = *m - i__ - ib + 1;
+	    i__4 = *n - i__ + 1;
+	    i__5 = *m - i__ - ib + 1;
+	    slarfb_("R", "N", "F", "R", &i__3, &i__4, &ib, &a[i__ + i__ * 
+		    a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + ib + 
+		    i__ * a_dim1], lda, &work[1], &i__5);
+	}
+    }
+    return 0;
+
+/*     End of SGELQT */
+
+} /* sgelqt_ */
+
diff --git a/lapack-netlib/SRC/sgelqt3.c b/lapack-netlib/SRC/sgelqt3.c
new file mode 100644
index 000000000..9601beac2
--- /dev/null
+++ b/lapack-netlib/SRC/sgelqt3.c
@@ -0,0 +1,657 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static real c_b7 = 1.f;
+static real c_b19 = -1.f;
+
+/* > \brief \b SGELQT3 */
+
+/*  Definition: */
+/*  =========== */
+
+/*        SUBROUTINE SGELQT3( M, N, A, LDA, T, LDT, INFO ) */
+
+/*       INTEGER   INFO, LDA, M, N, LDT */
+/*       REAL   A( LDA, * ), T( LDT, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELQT3 recursively computes a LQ factorization of a real M-by-N */
+/* > matrix A, using the compact WY representation of Q. */
+/* > */
+/* > Based on the algorithm of Elmroth and Gustavson, */
+/* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M =< N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
+/* >          below the diagonal contain the N-by-N lower triangular matrix L; the */
+/* >          elements above the diagonal are the rows of V.  See below for */
+/* >          further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT,N) */
+/* >          The N-by-N upper triangular factor of the block reflector. */
+/* >          The elements on and above the diagonal contain the block */
+/* >          reflector T; the elements below the diagonal are not used. */
+/* >          See below for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th row */
+/* >  above the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1  v1 v1 v1 v1 ) */
+/* >                   (     1  v2 v2 v2 ) */
+/* >                   (     1  v3 v3 v3 ) */
+/* > */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
+/* >  block reflector H is then given by */
+/* > */
+/* >               H = I - V * T * V**T */
+/* > */
+/* >  where V**T is the transpose of V. */
+/* > */
+/* >  For details of the algorithm, see Elmroth and Gustavson (cited above). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgelqt3_(integer *m, integer *n, real *a, integer *lda, 
+	real *t, integer *ldt, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2;
+
+    /* Local variables */
+    integer i__, j, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *);
+    integer i1, j1, m1, m2;
+    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), xerbla_(char *, integer *, ftnlen), slarfg_(integer *, real *, real *, integer *, real *);
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*ldt < f2cmax(1,*m)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELQT3", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    if (*m == 1) {
+
+/*        Compute Householder transform when N=1 */
+
+	slarfg_(n, &a[a_offset], &a[f2cmin(2,*n) * a_dim1 + 1], lda, &t[t_offset]
+		);
+
+    } else {
+
+/*        Otherwise, split A into blocks... */
+
+	m1 = *m / 2;
+	m2 = *m - m1;
+/* Computing MIN */
+	i__1 = m1 + 1;
+	i1 = f2cmin(i__1,*m);
+/* Computing MIN */
+	i__1 = *m + 1;
+	j1 = f2cmin(i__1,*n);
+
+/*        Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */
+
+	sgelqt3_(&m1, n, &a[a_offset], lda, &t[t_offset], ldt, &iinfo);
+
+/*        Compute A(J1:M,1:N) = Q1^H A(J1:M,1:N) [workspace: T(1:N1,J1:N)] */
+
+	i__1 = m2;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = m1;
+	    for (j = 1; j <= i__2; ++j) {
+		t[i__ + m1 + j * t_dim1] = a[i__ + m1 + j * a_dim1];
+	    }
+	}
+	strmm_("R", "U", "T", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 
+		+ t_dim1], ldt);
+
+	i__1 = *n - m1;
+	sgemm_("N", "T", &m2, &m1, &i__1, &c_b7, &a[i1 + i1 * a_dim1], lda, &
+		a[i1 * a_dim1 + 1], lda, &c_b7, &t[i1 + t_dim1], ldt);
+
+	strmm_("R", "U", "N", "N", &m2, &m1, &c_b7, &t[t_offset], ldt, &t[i1 
+		+ t_dim1], ldt);
+
+	i__1 = *n - m1;
+	sgemm_("N", "N", &m2, &i__1, &m1, &c_b19, &t[i1 + t_dim1], ldt, &a[i1 
+		* a_dim1 + 1], lda, &c_b7, &a[i1 + i1 * a_dim1], lda);
+
+	strmm_("R", "U", "N", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 
+		+ t_dim1], ldt);
+
+	i__1 = m2;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = m1;
+	    for (j = 1; j <= i__2; ++j) {
+		a[i__ + m1 + j * a_dim1] -= t[i__ + m1 + j * t_dim1];
+		t[i__ + m1 + j * t_dim1] = 0.f;
+	    }
+	}
+
+/*        Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */
+
+	i__1 = *n - m1;
+	sgelqt3_(&m2, &i__1, &a[i1 + i1 * a_dim1], lda, &t[i1 + i1 * t_dim1], 
+		ldt, &iinfo);
+
+/*        Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 */
+
+	i__1 = m2;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = m1;
+	    for (j = 1; j <= i__2; ++j) {
+		t[j + (i__ + m1) * t_dim1] = a[j + (i__ + m1) * a_dim1];
+	    }
+	}
+
+	strmm_("R", "U", "T", "U", &m1, &m2, &c_b7, &a[i1 + i1 * a_dim1], lda,
+		 &t[i1 * t_dim1 + 1], ldt);
+
+	i__1 = *n - *m;
+	sgemm_("N", "T", &m1, &m2, &i__1, &c_b7, &a[j1 * a_dim1 + 1], lda, &a[
+		i1 + j1 * a_dim1], lda, &c_b7, &t[i1 * t_dim1 + 1], ldt);
+
+	strmm_("L", "U", "N", "N", &m1, &m2, &c_b19, &t[t_offset], ldt, &t[i1 
+		* t_dim1 + 1], ldt);
+
+	strmm_("R", "U", "N", "N", &m1, &m2, &c_b7, &t[i1 + i1 * t_dim1], ldt,
+		 &t[i1 * t_dim1 + 1], ldt);
+
+
+
+/*        Y = (Y1,Y2); L = [ L1            0  ];  T = [T1 T3] */
+/*                         [ A(1:N1,J1:N)  L2 ]       [ 0 T2] */
+
+    }
+
+    return 0;
+
+/*     End of SGELQT3 */
+
+} /* sgelqt3_ */
+
diff --git a/lapack-netlib/SRC/sgels.c b/lapack-netlib/SRC/sgels.c
new file mode 100644
index 000000000..bb53bfac0
--- /dev/null
+++ b/lapack-netlib/SRC/sgels.c
@@ -0,0 +1,954 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static real c_b33 = 0.f;
+static integer c__0 = 0;
+
+/* > \brief <b> SGELS solves overdetermined or underdetermined systems for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGELS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgels.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgels.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgels.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */
+/*                         INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS */
+/*       REAL               A( LDA, * ), B( LDB, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGELS solves overdetermined or underdetermined real linear systems */
+/* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */
+/* > factorization of A.  It is assumed that A has full rank. */
+/* > */
+/* > The following options are provided: */
+/* > */
+/* > 1. If TRANS = 'N' and m >= n:  find the least squares solution of */
+/* >    an overdetermined system, i.e., solve the least squares problem */
+/* >                 minimize || B - A*X ||. */
+/* > */
+/* > 2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
+/* >    an underdetermined system A * X = B. */
+/* > */
+/* > 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of */
+/* >    an underdetermined system A**T * X = B. */
+/* > */
+/* > 4. If TRANS = 'T' and m < n:  find the least squares solution of */
+/* >    an overdetermined system, i.e., solve the least squares problem */
+/* >                 minimize || B - A**T * X ||. */
+/* > */
+/* > Several right hand side vectors b and solution vectors x can be */
+/* > handled in a single call; they are stored as the columns of the */
+/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
+/* > matrix X. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N': the linear system involves A; */
+/* >          = 'T': the linear system involves A**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of */
+/* >          columns of the matrices B and X. NRHS >=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >            if M >= N, A is overwritten by details of its QR */
+/* >                       factorization as returned by SGEQRF; */
+/* >            if M <  N, A is overwritten by details of its LQ */
+/* >                       factorization as returned by SGELQF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the matrix B of right hand side vectors, stored */
+/* >          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
+/* >          if TRANS = 'T'. */
+/* >          On exit, if INFO = 0, B is overwritten by the solution */
+/* >          vectors, stored columnwise: */
+/* >          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
+/* >          squares solution vectors; the residual sum of squares for the */
+/* >          solution in each column is given by the sum of squares of */
+/* >          elements N+1 to M in that column; */
+/* >          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
+/* >          minimum norm solution vectors; */
+/* >          if TRANS = 'T' and m >= n, rows 1 to M of B contain the */
+/* >          minimum norm solution vectors; */
+/* >          if TRANS = 'T' and m < n, rows 1 to M of B contain the */
+/* >          least squares solution vectors; the residual sum of squares */
+/* >          for the solution in each column is given by the sum of */
+/* >          squares of elements M+1 to N in that column. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= MAX(1,M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */
+/* >          For optimal performance, */
+/* >          LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS )*NB ). */
+/* >          where MN = f2cmin(M,N) and NB is the optimum block size. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO =  i, the i-th diagonal element of the */
+/* >                triangular factor of A is zero, so that A does not have */
+/* >                full rank; the least squares solution could not be */
+/* >                computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgels_(char *trans, integer *m, integer *n, integer *
+	nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, 
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    real anrm, bnrm;
+    integer brow;
+    logical tpsd;
+    integer i__, j, iascl, ibscl;
+    extern logical lsame_(char *, char *);
+    integer wsize;
+    real rwork[1];
+    integer nb;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    integer mn;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer scllen;
+    real bignum;
+    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *), slascl_(char *, integer 
+	    *, integer *, real *, real *, integer *, integer *, real *, 
+	    integer *, integer *), sgeqrf_(integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), slaset_(char 
+	    *, integer *, integer *, real *, real *, real *, integer *);
+    real smlnum;
+    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *), strtrs_(char *, char *, 
+	    char *, integer *, integer *, real *, integer *, real *, integer *
+	    , integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    mn = f2cmin(*m,*n);
+    lquery = *lwork == -1;
+    if (! (lsame_(trans, "N") || lsame_(trans, "T"))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*m);
+	if (*ldb < f2cmax(i__1,*n)) {
+	    *info = -8;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs);
+	    if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
+		*info = -10;
+	    }
+	}
+    }
+
+/*     Figure out optimal block size */
+
+    if (*info == 0 || *info == -10) {
+
+	tpsd = TRUE_;
+	if (lsame_(trans, "N")) {
+	    tpsd = FALSE_;
+	}
+
+	if (*m >= *n) {
+	    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    if (tpsd) {
+/* Computing MAX */
+		i__1 = nb, i__2 = ilaenv_(&c__1, "SORMQR", "LN", m, nrhs, n, &
+			c_n1, (ftnlen)6, (ftnlen)2);
+		nb = f2cmax(i__1,i__2);
+	    } else {
+/* Computing MAX */
+		i__1 = nb, i__2 = ilaenv_(&c__1, "SORMQR", "LT", m, nrhs, n, &
+			c_n1, (ftnlen)6, (ftnlen)2);
+		nb = f2cmax(i__1,i__2);
+	    }
+	} else {
+	    nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    if (tpsd) {
+/* Computing MAX */
+		i__1 = nb, i__2 = ilaenv_(&c__1, "SORMLQ", "LT", n, nrhs, m, &
+			c_n1, (ftnlen)6, (ftnlen)2);
+		nb = f2cmax(i__1,i__2);
+	    } else {
+/* Computing MAX */
+		i__1 = nb, i__2 = ilaenv_(&c__1, "SORMLQ", "LN", n, nrhs, m, &
+			c_n1, (ftnlen)6, (ftnlen)2);
+		nb = f2cmax(i__1,i__2);
+	    }
+	}
+
+/* Computing MAX */
+	i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs) * nb;
+	wsize = f2cmax(i__1,i__2);
+	work[1] = (real) wsize;
+
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELS ", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+/* Computing MIN */
+    i__1 = f2cmin(*m,*n);
+    if (f2cmin(i__1,*nrhs) == 0) {
+	i__1 = f2cmax(*m,*n);
+	slaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = slamch_("S") / slamch_("P");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+
+/*     Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, rwork);
+    iascl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 2;
+    } else if (anrm == 0.f) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = f2cmax(*m,*n);
+	slaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
+	goto L50;
+    }
+
+    brow = *m;
+    if (tpsd) {
+	brow = *n;
+    }
+    bnrm = slange_("M", &brow, nrhs, &b[b_offset], ldb, rwork);
+    ibscl = 0;
+    if (bnrm > 0.f && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
+		ldb, info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
+		ldb, info);
+	ibscl = 2;
+    }
+
+    if (*m >= *n) {
+
+/*        compute QR factorization of A */
+
+	i__1 = *lwork - mn;
+	sgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
+		;
+
+/*        workspace at least N, optimally N*NB */
+
+	if (! tpsd) {
+
+/*           Least-Squares Problem f2cmin || A * X - B || */
+
+/*           B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */
+
+	    i__1 = *lwork - mn;
+	    sormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[
+		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+/*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */
+
+	    strtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset]
+		    , lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+	    scllen = *n;
+
+	} else {
+
+/*           Underdetermined system of equations A**T * X = B */
+
+/*           B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */
+
+	    strtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], 
+		    lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+/*           B(N+1:M,1:NRHS) = ZERO */
+
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = *n + 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = 0.f;
+/* L10: */
+		}
+/* L20: */
+	    }
+
+/*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */
+
+	    i__1 = *lwork - mn;
+	    sormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, &
+		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+	    scllen = *m;
+
+	}
+
+    } else {
+
+/*        Compute LQ factorization of A */
+
+	i__1 = *lwork - mn;
+	sgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
+		;
+
+/*        workspace at least M, optimally M*NB. */
+
+	if (! tpsd) {
+
+/*           underdetermined system of equations A * X = B */
+
+/*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */
+
+	    strtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset]
+		    , lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+/*           B(M+1:N,1:NRHS) = 0 */
+
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = *m + 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = 0.f;
+/* L30: */
+		}
+/* L40: */
+	    }
+
+/*           B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */
+
+	    i__1 = *lwork - mn;
+	    sormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[
+		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+	    scllen = *n;
+
+	} else {
+
+/*           overdetermined system f2cmin || A**T * X - B || */
+
+/*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */
+
+	    i__1 = *lwork - mn;
+	    sormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, &
+		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+/*           B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */
+
+	    strtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], 
+		    lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+	    scllen = *m;
+
+	}
+
+    }
+
+/*     Undo scaling */
+
+    if (iascl == 1) {
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    } else if (iascl == 2) {
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    }
+    if (ibscl == 1) {
+	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    } else if (ibscl == 2) {
+	slascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    }
+
+L50:
+    work[1] = (real) wsize;
+
+    return 0;
+
+/*     End of SGELS */
+
+} /* sgels_ */
+
diff --git a/lapack-netlib/SRC/sgelss.c b/lapack-netlib/SRC/sgelss.c
new file mode 100644
index 000000000..6da9b3348
--- /dev/null
+++ b/lapack-netlib/SRC/sgelss.c
@@ -0,0 +1,1308 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__6 = 6;
+static integer c_n1 = -1;
+static integer c__1 = 1;
+static integer c__0 = 0;
+static real c_b50 = 0.f;
+static real c_b83 = 1.f;
+
+/* > \brief <b> SGELSS solves overdetermined or underdetermined systems for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGELSS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgelss.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgelss.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgelss.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, */
+/*                          WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */
+/*       REAL               RCOND */
+/*       REAL               A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGELSS computes the minimum norm solution to a real linear least */
+/* > squares problem: */
+/* > */
+/* > Minimize 2-norm(| b - A*x |). */
+/* > */
+/* > using the singular value decomposition (SVD) of A. A is an M-by-N */
+/* > matrix which may be rank-deficient. */
+/* > */
+/* > Several right hand side vectors b and solution vectors x can be */
+/* > handled in a single call; they are stored as the columns of the */
+/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix */
+/* > X. */
+/* > */
+/* > The effective rank of A is determined by treating as zero those */
+/* > singular values which are less than RCOND times the largest singular */
+/* > value. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrices B and X. NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the first f2cmin(m,n) rows of A are overwritten with */
+/* >          its right singular vectors, stored rowwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the M-by-NRHS right hand side matrix B. */
+/* >          On exit, B is overwritten by the N-by-NRHS solution */
+/* >          matrix X.  If m >= n and RANK = n, the residual */
+/* >          sum-of-squares for the solution in the i-th column is given */
+/* >          by the sum of squares of elements n+1:m in that column. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,f2cmax(M,N)). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is REAL array, dimension (f2cmin(M,N)) */
+/* >          The singular values of A in decreasing order. */
+/* >          The condition number of A in the 2-norm = S(1)/S(f2cmin(m,n)). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >          RCOND is used to determine the effective rank of A. */
+/* >          Singular values S(i) <= RCOND*S(1) are treated as zero. */
+/* >          If RCOND < 0, machine precision is used instead. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RANK */
+/* > \verbatim */
+/* >          RANK is INTEGER */
+/* >          The effective rank of A, i.e., the number of singular values */
+/* >          which are greater than RCOND*S(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= 1, and also: */
+/* >          LWORK >= 3*f2cmin(M,N) + f2cmax( 2*f2cmin(M,N), f2cmax(M,N), NRHS ) */
+/* >          For good performance, LWORK should generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  the algorithm for computing the SVD failed to converge; */
+/* >                if INFO = i, i off-diagonal elements of an intermediate */
+/* >                bidiagonal form did not converge to zero. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgelss_(integer *m, integer *n, integer *nrhs, real *a, 
+	integer *lda, real *b, integer *ldb, real *s, real *rcond, integer *
+	rank, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+    real r__1;
+
+    /* Local variables */
+    real anrm, bnrm;
+    integer itau, lwork_sgebrd__, lwork_sgeqrf__, i__, lwork_sorgbr__, 
+	    lwork_sormbr__, lwork_sormlq__, iascl, ibscl, lwork_sormqr__, 
+	    chunk;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *);
+    real sfmin;
+    integer minmn, maxmn;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    integer itaup, itauq;
+    extern /* Subroutine */ int srscl_(integer *, real *, real *, integer *);
+    integer mnthr, iwork;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    integer bl, ie, il;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    integer mm, bdspac;
+    extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer 
+	    *, real *, real *, real *, real *, real *, integer *, integer *);
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *), slascl_(char *, integer 
+	    *, integer *, real *, real *, integer *, integer *, real *, 
+	    integer *, integer *), sgeqrf_(integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), slacpy_(char 
+	    *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
+	    real *, integer *), sbdsqr_(char *, integer *, integer *, 
+	    integer *, integer *, real *, real *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), sorgbr_(
+	    char *, integer *, integer *, integer *, real *, integer *, real *
+	    , real *, integer *, integer *);
+    integer ldwork;
+    extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , real *, integer *, integer *);
+    integer minwrk, maxwrk;
+    real smlnum;
+    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    real dum[1], eps, thr;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --s;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    minmn = f2cmin(*m,*n);
+    maxmn = f2cmax(*m,*n);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,maxmn)) {
+	*info = -7;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV.) */
+
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	if (minmn > 0) {
+	    mm = *m;
+	    mnthr = ilaenv_(&c__6, "SGELSS", " ", m, n, nrhs, &c_n1, (ftnlen)
+		    6, (ftnlen)1);
+	    if (*m >= *n && *m >= mnthr) {
+
+/*              Path 1a - overdetermined, with many more rows than */
+/*                        columns */
+
+/*              Compute space needed for SGEQRF */
+		sgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
+		lwork_sgeqrf__ = dum[0];
+/*              Compute space needed for SORMQR */
+		sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, dum, &b[
+			b_offset], ldb, dum, &c_n1, info);
+		lwork_sormqr__ = dum[0];
+		mm = *n;
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_sgeqrf__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_sormqr__;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	    if (*m >= *n) {
+
+/*              Path 1 - overdetermined or exactly determined */
+
+/*              Compute workspace needed for SBDSQR */
+
+/* Computing MAX */
+		i__1 = 1, i__2 = *n * 5;
+		bdspac = f2cmax(i__1,i__2);
+/*              Compute space needed for SGEBRD */
+		sgebrd_(&mm, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, 
+			&c_n1, info);
+		lwork_sgebrd__ = dum[0];
+/*              Compute space needed for SORMBR */
+		sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, dum, &
+			b[b_offset], ldb, dum, &c_n1, info);
+		lwork_sormbr__ = dum[0];
+/*              Compute space needed for SORGBR */
+		sorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, 
+			info);
+		lwork_sorgbr__ = dum[0];
+/*              Compute total workspace needed */
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * 3 + lwork_sgebrd__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * 3 + lwork_sormbr__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * 3 + lwork_sorgbr__;
+		maxwrk = f2cmax(i__1,i__2);
+		maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * *nrhs;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = f2cmax(i__1,
+			i__2);
+		minwrk = f2cmax(i__1,bdspac);
+		maxwrk = f2cmax(minwrk,maxwrk);
+	    }
+	    if (*n > *m) {
+
+/*              Compute workspace needed for SBDSQR */
+
+/* Computing MAX */
+		i__1 = 1, i__2 = *m * 5;
+		bdspac = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = f2cmax(i__1,
+			i__2);
+		minwrk = f2cmax(i__1,bdspac);
+		if (*n >= mnthr) {
+
+/*                 Path 2a - underdetermined, with many more columns */
+/*                 than rows */
+
+/*                 Compute space needed for SGEBRD */
+		    sgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, 
+			    dum, &c_n1, info);
+		    lwork_sgebrd__ = dum[0];
+/*                 Compute space needed for SORMBR */
+		    sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, dum,
+			     &b[b_offset], ldb, dum, &c_n1, info);
+		    lwork_sormbr__ = dum[0];
+/*                 Compute space needed for SORGBR */
+		    sorgbr_("P", m, m, m, &a[a_offset], lda, dum, dum, &c_n1, 
+			    info);
+		    lwork_sorgbr__ = dum[0];
+/*                 Compute space needed for SORMLQ */
+		    sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, dum, &b[
+			    b_offset], ldb, dum, &c_n1, info);
+		    lwork_sormlq__ = dum[0];
+/*                 Compute total workspace needed */
+		    maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + 
+			    lwork_sgebrd__;
+		    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + 
+			    lwork_sormbr__;
+		    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + 
+			    lwork_sorgbr__;
+		    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + *m + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    if (*nrhs > 1) {
+/* Computing MAX */
+			i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+			maxwrk = f2cmax(i__1,i__2);
+		    } else {
+/* Computing MAX */
+			i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
+			maxwrk = f2cmax(i__1,i__2);
+		    }
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m + lwork_sormlq__;
+		    maxwrk = f2cmax(i__1,i__2);
+		} else {
+
+/*                 Path 2 - underdetermined */
+
+/*                 Compute space needed for SGEBRD */
+		    sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, 
+			    dum, &c_n1, info);
+		    lwork_sgebrd__ = dum[0];
+/*                 Compute space needed for SORMBR */
+		    sormbr_("Q", "L", "T", m, nrhs, m, &a[a_offset], lda, dum,
+			     &b[b_offset], ldb, dum, &c_n1, info);
+		    lwork_sormbr__ = dum[0];
+/*                 Compute space needed for SORGBR */
+		    sorgbr_("P", m, n, m, &a[a_offset], lda, dum, dum, &c_n1, 
+			    info);
+		    lwork_sorgbr__ = dum[0];
+		    maxwrk = *m * 3 + lwork_sgebrd__;
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * 3 + lwork_sormbr__;
+		    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * 3 + lwork_sorgbr__;
+		    maxwrk = f2cmax(i__1,i__2);
+		    maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *n * *nrhs;
+		    maxwrk = f2cmax(i__1,i__2);
+		}
+	    }
+	    maxwrk = f2cmax(minwrk,maxwrk);
+	}
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELSS", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    eps = slamch_("P");
+    sfmin = slamch_("S");
+    smlnum = sfmin / eps;
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 2;
+    } else if (anrm == 0.f) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = f2cmax(*m,*n);
+	slaset_("F", &i__1, nrhs, &c_b50, &c_b50, &b[b_offset], ldb);
+	slaset_("F", &minmn, &c__1, &c_b50, &c_b50, &s[1], &minmn);
+	*rank = 0;
+	goto L70;
+    }
+
+/*     Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0.f && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     Overdetermined case */
+
+    if (*m >= *n) {
+
+/*        Path 1 - overdetermined or exactly determined */
+
+	mm = *m;
+	if (*m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns */
+
+	    mm = *n;
+	    itau = 1;
+	    iwork = itau + *n;
+
+/*           Compute A=Q*R */
+/*           (Workspace: need 2*N, prefer N+N*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1,
+		     info);
+
+/*           Multiply B by transpose(Q) */
+/*           (Workspace: need N+NRHS, prefer N+NRHS*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[iwork], &i__1, info);
+
+/*           Zero out below R */
+
+	    if (*n > 1) {
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		slaset_("L", &i__1, &i__2, &c_b50, &c_b50, &a[a_dim1 + 2], 
+			lda);
+	    }
+	}
+
+	ie = 1;
+	itauq = ie + *n;
+	itaup = itauq + *n;
+	iwork = itaup + *n;
+
+/*        Bidiagonalize R in A */
+/*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */
+
+	i__1 = *lwork - iwork + 1;
+	sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		work[itaup], &work[iwork], &i__1, info);
+
+/*        Multiply B by transpose of left bidiagonalizing vectors of R */
+/*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */
+
+	i__1 = *lwork - iwork + 1;
+	sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], 
+		&b[b_offset], ldb, &work[iwork], &i__1, info);
+
+/*        Generate right bidiagonalizing vectors of R in A */
+/*        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwork + 1;
+	sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &
+		i__1, info);
+	iwork = ie + *n;
+
+/*        Perform bidiagonal QR iteration */
+/*          multiply B by transpose of left singular vectors */
+/*          compute right singular vectors in A */
+/*        (Workspace: need BDSPAC) */
+
+	sbdsqr_("U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, 
+		dum, &c__1, &b[b_offset], ldb, &work[iwork], info);
+	if (*info != 0) {
+	    goto L70;
+	}
+
+/*        Multiply B by reciprocals of singular values */
+
+/* Computing MAX */
+	r__1 = *rcond * s[1];
+	thr = f2cmax(r__1,sfmin);
+	if (*rcond < 0.f) {
+/* Computing MAX */
+	    r__1 = eps * s[1];
+	    thr = f2cmax(r__1,sfmin);
+	}
+	*rank = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (s[i__] > thr) {
+		srscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
+		++(*rank);
+	    } else {
+		slaset_("F", &c__1, nrhs, &c_b50, &c_b50, &b[i__ + b_dim1], 
+			ldb);
+	    }
+/* L10: */
+	}
+
+/*        Multiply B by right singular vectors */
+/*        (Workspace: need N, prefer N*NRHS) */
+
+	if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
+	    sgemm_("T", "N", n, nrhs, n, &c_b83, &a[a_offset], lda, &b[
+		    b_offset], ldb, &c_b50, &work[1], ldb);
+	    slacpy_("G", n, nrhs, &work[1], ldb, &b[b_offset], ldb)
+		    ;
+	} else if (*nrhs > 1) {
+	    chunk = *lwork / *n;
+	    i__1 = *nrhs;
+	    i__2 = chunk;
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = *nrhs - i__ + 1;
+		bl = f2cmin(i__3,chunk);
+		sgemm_("T", "N", n, &bl, n, &c_b83, &a[a_offset], lda, &b[i__ 
+			* b_dim1 + 1], ldb, &c_b50, &work[1], n);
+		slacpy_("G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb);
+/* L20: */
+	    }
+	} else {
+	    sgemv_("T", n, n, &c_b83, &a[a_offset], lda, &b[b_offset], &c__1, 
+		    &c_b50, &work[1], &c__1);
+	    scopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
+	}
+
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__2 = *m, i__1 = (*m << 1) - 4, i__2 = f2cmax(i__2,i__1), i__2 = f2cmax(
+		i__2,*nrhs), i__1 = *n - *m * 3;
+	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + f2cmax(i__2,i__1)) {
+
+/*        Path 2a - underdetermined, with many more columns than rows */
+/*        and sufficient workspace for an efficient algorithm */
+
+	    ldwork = *m;
+/* Computing MAX */
+/* Computing MAX */
+	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), i__3 = 
+		    f2cmax(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__2 = (*m << 2) + *m * *lda + f2cmax(i__3,i__4), i__1 = *m * *lda + 
+		    *m + *m * *nrhs;
+	    if (*lwork >= f2cmax(i__2,i__1)) {
+		ldwork = *lda;
+	    }
+	    itau = 1;
+	    iwork = *m + 1;
+
+/*        Compute A=L*Q */
+/*        (Workspace: need 2*M, prefer M+M*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2,
+		     info);
+	    il = iwork;
+
+/*        Copy L to WORK(IL), zeroing out above it */
+
+	    slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
+	    i__2 = *m - 1;
+	    i__1 = *m - 1;
+	    slaset_("U", &i__2, &i__1, &c_b50, &c_b50, &work[il + ldwork], &
+		    ldwork);
+	    ie = il + ldwork * *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    iwork = itaup + *m;
+
+/*        Bidiagonalize L in WORK(IL) */
+/*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], 
+		    &work[itaup], &work[iwork], &i__2, info);
+
+/*        Multiply B by transpose of left bidiagonalizing vectors of L */
+/*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
+		    itauq], &b[b_offset], ldb, &work[iwork], &i__2, info);
+
+/*        Generate right bidiagonalizing vectors of R in WORK(IL) */
+/*        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    sorgbr_("P", m, m, m, &work[il], &ldwork, &work[itaup], &work[
+		    iwork], &i__2, info);
+	    iwork = ie + *m;
+
+/*        Perform bidiagonal QR iteration, */
+/*           computing right singular vectors of L in WORK(IL) and */
+/*           multiplying B by transpose of left singular vectors */
+/*        (Workspace: need M*M+M+BDSPAC) */
+
+	    sbdsqr_("U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], &
+		    ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork]
+		    , info);
+	    if (*info != 0) {
+		goto L70;
+	    }
+
+/*        Multiply B by reciprocals of singular values */
+
+/* Computing MAX */
+	    r__1 = *rcond * s[1];
+	    thr = f2cmax(r__1,sfmin);
+	    if (*rcond < 0.f) {
+/* Computing MAX */
+		r__1 = eps * s[1];
+		thr = f2cmax(r__1,sfmin);
+	    }
+	    *rank = 0;
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		if (s[i__] > thr) {
+		    srscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
+		    ++(*rank);
+		} else {
+		    slaset_("F", &c__1, nrhs, &c_b50, &c_b50, &b[i__ + b_dim1]
+			    , ldb);
+		}
+/* L30: */
+	    }
+	    iwork = ie;
+
+/*        Multiply B by right singular vectors of L in WORK(IL) */
+/*        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */
+
+	    if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) {
+		sgemm_("T", "N", m, nrhs, m, &c_b83, &work[il], &ldwork, &b[
+			b_offset], ldb, &c_b50, &work[iwork], ldb);
+		slacpy_("G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb);
+	    } else if (*nrhs > 1) {
+		chunk = (*lwork - iwork + 1) / *m;
+		i__2 = *nrhs;
+		i__1 = chunk;
+		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += 
+			i__1) {
+/* Computing MIN */
+		    i__3 = *nrhs - i__ + 1;
+		    bl = f2cmin(i__3,chunk);
+		    sgemm_("T", "N", m, &bl, m, &c_b83, &work[il], &ldwork, &
+			    b[i__ * b_dim1 + 1], ldb, &c_b50, &work[iwork], m);
+		    slacpy_("G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1]
+			    , ldb);
+/* L40: */
+		}
+	    } else {
+		sgemv_("T", m, m, &c_b83, &work[il], &ldwork, &b[b_dim1 + 1], 
+			&c__1, &c_b50, &work[iwork], &c__1);
+		scopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1);
+	    }
+
+/*        Zero out below first M rows of B */
+
+	    i__1 = *n - *m;
+	    slaset_("F", &i__1, nrhs, &c_b50, &c_b50, &b[*m + 1 + b_dim1], 
+		    ldb);
+	    iwork = itau + *m;
+
+/*        Multiply transpose(Q) by B */
+/*        (Workspace: need M+NRHS, prefer M+NRHS*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[iwork], &i__1, info);
+
+	} else {
+
+/*        Path 2 - remaining underdetermined cases */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    iwork = itaup + *m;
+
+/*        Bidiagonalize A */
+/*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[iwork], &i__1, info);
+
+/*        Multiply B by transpose of left bidiagonalizing vectors */
+/*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
+		    , &b[b_offset], ldb, &work[iwork], &i__1, info);
+
+/*        Generate right bidiagonalizing vectors in A */
+/*        (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
+		    iwork], &i__1, info);
+	    iwork = ie + *m;
+
+/*        Perform bidiagonal QR iteration, */
+/*           computing right singular vectors of A in A and */
+/*           multiplying B by transpose of left singular vectors */
+/*        (Workspace: need BDSPAC) */
+
+	    sbdsqr_("L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], 
+		    lda, dum, &c__1, &b[b_offset], ldb, &work[iwork], info);
+	    if (*info != 0) {
+		goto L70;
+	    }
+
+/*        Multiply B by reciprocals of singular values */
+
+/* Computing MAX */
+	    r__1 = *rcond * s[1];
+	    thr = f2cmax(r__1,sfmin);
+	    if (*rcond < 0.f) {
+/* Computing MAX */
+		r__1 = eps * s[1];
+		thr = f2cmax(r__1,sfmin);
+	    }
+	    *rank = 0;
+	    i__1 = *m;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (s[i__] > thr) {
+		    srscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
+		    ++(*rank);
+		} else {
+		    slaset_("F", &c__1, nrhs, &c_b50, &c_b50, &b[i__ + b_dim1]
+			    , ldb);
+		}
+/* L50: */
+	    }
+
+/*        Multiply B by right singular vectors of A */
+/*        (Workspace: need N, prefer N*NRHS) */
+
+	    if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
+		sgemm_("T", "N", n, nrhs, m, &c_b83, &a[a_offset], lda, &b[
+			b_offset], ldb, &c_b50, &work[1], ldb);
+		slacpy_("F", n, nrhs, &work[1], ldb, &b[b_offset], ldb);
+	    } else if (*nrhs > 1) {
+		chunk = *lwork / *n;
+		i__1 = *nrhs;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
+			i__2) {
+/* Computing MIN */
+		    i__3 = *nrhs - i__ + 1;
+		    bl = f2cmin(i__3,chunk);
+		    sgemm_("T", "N", n, &bl, m, &c_b83, &a[a_offset], lda, &b[
+			    i__ * b_dim1 + 1], ldb, &c_b50, &work[1], n);
+		    slacpy_("F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], 
+			    ldb);
+/* L60: */
+		}
+	    } else {
+		sgemv_("T", m, n, &c_b83, &a[a_offset], lda, &b[b_offset], &
+			c__1, &c_b50, &work[1], &c__1);
+		scopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
+	    }
+	}
+    }
+
+/*     Undo scaling */
+
+    if (iascl == 1) {
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    } else if (iascl == 2) {
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    }
+    if (ibscl == 1) {
+	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L70:
+    work[1] = (real) maxwrk;
+    return 0;
+
+/*     End of SGELSS */
+
+} /* sgelss_ */
+
diff --git a/lapack-netlib/SRC/sgelsy.c b/lapack-netlib/SRC/sgelsy.c
new file mode 100644
index 000000000..def845983
--- /dev/null
+++ b/lapack-netlib/SRC/sgelsy.c
@@ -0,0 +1,939 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__0 = 0;
+static real c_b31 = 0.f;
+static integer c__2 = 2;
+static real c_b54 = 1.f;
+
+/* > \brief <b> SGELSY solves overdetermined or underdetermined systems for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGELSY + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgelsy.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgelsy.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgelsy.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, */
+/*                          WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */
+/*       REAL               RCOND */
+/*       INTEGER            JPVT( * ) */
+/*       REAL               A( LDA, * ), B( LDB, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGELSY computes the minimum-norm solution to a real linear least */
+/* > squares problem: */
+/* >     minimize || A * X - B || */
+/* > using a complete orthogonal factorization of A.  A is an M-by-N */
+/* > matrix which may be rank-deficient. */
+/* > */
+/* > Several right hand side vectors b and solution vectors x can be */
+/* > handled in a single call; they are stored as the columns of the */
+/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
+/* > matrix X. */
+/* > */
+/* > The routine first computes a QR factorization with column pivoting: */
+/* >     A * P = Q * [ R11 R12 ] */
+/* >                 [  0  R22 ] */
+/* > with R11 defined as the largest leading submatrix whose estimated */
+/* > condition number is less than 1/RCOND.  The order of R11, RANK, */
+/* > is the effective rank of A. */
+/* > */
+/* > Then, R22 is considered to be negligible, and R12 is annihilated */
+/* > by orthogonal transformations from the right, arriving at the */
+/* > complete orthogonal factorization: */
+/* >    A * P = Q * [ T11 0 ] * Z */
+/* >                [  0  0 ] */
+/* > The minimum-norm solution is then */
+/* >    X = P * Z**T [ inv(T11)*Q1**T*B ] */
+/* >                 [        0         ] */
+/* > where Q1 consists of the first RANK columns of Q. */
+/* > */
+/* > This routine is basically identical to the original xGELSX except */
+/* > three differences: */
+/* >   o The call to the subroutine xGEQPF has been substituted by the */
+/* >     the call to the subroutine xGEQP3. This subroutine is a Blas-3 */
+/* >     version of the QR factorization with column pivoting. */
+/* >   o Matrix B (the right hand side) is updated with Blas-3. */
+/* >   o The permutation of matrix B (the right hand side) is faster and */
+/* >     more simple. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of */
+/* >          columns of matrices B and X. NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, A has been overwritten by details of its */
+/* >          complete orthogonal factorization. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the M-by-NRHS right hand side matrix B. */
+/* >          On exit, the N-by-NRHS solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] JPVT */
+/* > \verbatim */
+/* >          JPVT is INTEGER array, dimension (N) */
+/* >          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */
+/* >          to the front of AP, otherwise column i is a free column. */
+/* >          On exit, if JPVT(i) = k, then the i-th column of AP */
+/* >          was the k-th column of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >          RCOND is used to determine the effective rank of A, which */
+/* >          is defined as the order of the largest leading triangular */
+/* >          submatrix R11 in the QR factorization with pivoting of A, */
+/* >          whose estimated condition number < 1/RCOND. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RANK */
+/* > \verbatim */
+/* >          RANK is INTEGER */
+/* >          The effective rank of A, i.e., the order of the submatrix */
+/* >          R11.  This is the same as the order of the submatrix T11 */
+/* >          in the complete orthogonal factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          The unblocked strategy requires that: */
+/* >             LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */
+/* >          where MN = f2cmin( M, N ). */
+/* >          The block algorithm requires that: */
+/* >             LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */
+/* >          where NB is an upper bound on the blocksize returned */
+/* >          by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR, */
+/* >          and SORMRZ. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: If INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEsolve */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n */
+/* >    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */
+/* >    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgelsy_(integer *m, integer *n, integer *nrhs, real *a, 
+	integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, 
+	integer *rank, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Local variables */
+    real anrm, bnrm, smin, smax;
+    integer i__, j, iascl, ibscl, ismin, ismax;
+    real c1, c2;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    real wsize, s1, s2;
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), slaic1_(integer *, integer *, 
+	    real *, real *, real *, real *, real *, real *, real *), sgeqp3_(
+	    integer *, integer *, real *, integer *, integer *, real *, real *
+	    , integer *, integer *);
+    integer nb;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    integer mn;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
+	    real *, integer *);
+    integer lwkmin, nb1, nb2, nb3, nb4;
+    real sminpr, smaxpr, smlnum;
+    integer lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *), sormrz_(char *, char *, 
+	    integer *, integer *, integer *, integer *, real *, integer *, 
+	    real *, real *, integer *, real *, integer *, integer *), stzrzf_(integer *, integer *, real *, integer *, real *, 
+	    real *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --jpvt;
+    --work;
+
+    /* Function Body */
+    mn = f2cmin(*m,*n);
+    ismin = mn + 1;
+    ismax = (mn << 1) + 1;
+
+/*     Test the input arguments. */
+
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*m);
+	if (*ldb < f2cmax(i__1,*n)) {
+	    *info = -7;
+	}
+    }
+
+/*     Figure out optimal block size */
+
+    if (*info == 0) {
+	if (mn == 0 || *nrhs == 0) {
+	    lwkmin = 1;
+	    lwkopt = 1;
+	} else {
+	    nb1 = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
+		     (ftnlen)1);
+	    nb2 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
+		     (ftnlen)1);
+	    nb3 = ilaenv_(&c__1, "SORMQR", " ", m, n, nrhs, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    nb4 = ilaenv_(&c__1, "SORMRQ", " ", m, n, nrhs, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+/* Computing MAX */
+	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
+	    nb = f2cmax(i__1,nb4);
+/* Computing MAX */
+	    i__1 = mn << 1, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = mn + 
+		    *nrhs;
+	    lwkmin = mn + f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = lwkmin, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = f2cmax(
+		    i__1,i__2), i__2 = (mn << 1) + nb * *nrhs;
+	    lwkopt = f2cmax(i__1,i__2);
+	}
+	work[1] = (real) lwkopt;
+
+	if (*lwork < lwkmin && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELSY", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (mn == 0 || *nrhs == 0) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = slamch_("S") / slamch_("P");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+
+/*     Scale A, B if f2cmax entries outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 2;
+    } else if (anrm == 0.f) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = f2cmax(*m,*n);
+	slaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb);
+	*rank = 0;
+	goto L70;
+    }
+
+    bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0.f && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     Compute QR factorization with column pivoting of A: */
+/*        A * P = Q * R */
+
+    i__1 = *lwork - mn;
+    sgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1,
+	     info);
+    wsize = mn + work[mn + 1];
+
+/*     workspace: MN+2*N+NB*(N+1). */
+/*     Details of Householder rotations stored in WORK(1:MN). */
+
+/*     Determine RANK using incremental condition estimation */
+
+    work[ismin] = 1.f;
+    work[ismax] = 1.f;
+    smax = (r__1 = a[a_dim1 + 1], abs(r__1));
+    smin = smax;
+    if ((r__1 = a[a_dim1 + 1], abs(r__1)) == 0.f) {
+	*rank = 0;
+	i__1 = f2cmax(*m,*n);
+	slaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb);
+	goto L70;
+    } else {
+	*rank = 1;
+    }
+
+L10:
+    if (*rank < mn) {
+	i__ = *rank + 1;
+	slaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[
+		i__ + i__ * a_dim1], &sminpr, &s1, &c1);
+	slaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[
+		i__ + i__ * a_dim1], &smaxpr, &s2, &c2);
+
+	if (smaxpr * *rcond <= sminpr) {
+	    i__1 = *rank;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1];
+		work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1];
+/* L20: */
+	    }
+	    work[ismin + *rank] = c1;
+	    work[ismax + *rank] = c2;
+	    smin = sminpr;
+	    smax = smaxpr;
+	    ++(*rank);
+	    goto L10;
+	}
+    }
+
+/*     workspace: 3*MN. */
+
+/*     Logically partition R = [ R11 R12 ] */
+/*                             [  0  R22 ] */
+/*     where R11 = R(1:RANK,1:RANK) */
+
+/*     [R11,R12] = [ T11, 0 ] * Y */
+
+    if (*rank < *n) {
+	i__1 = *lwork - (mn << 1);
+	stzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + 
+		1], &i__1, info);
+    }
+
+/*     workspace: 2*MN. */
+/*     Details of Householder rotations stored in WORK(MN+1:2*MN) */
+
+/*     B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */
+
+    i__1 = *lwork - (mn << 1);
+    sormqr_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], &
+	    b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info);
+/* Computing MAX */
+    r__1 = wsize, r__2 = (mn << 1) + work[(mn << 1) + 1];
+    wsize = f2cmax(r__1,r__2);
+
+/*     workspace: 2*MN+NB*NRHS. */
+
+/*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */
+
+    strsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b54, &
+	    a[a_offset], lda, &b[b_offset], ldb);
+
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = *rank + 1; i__ <= i__2; ++i__) {
+	    b[i__ + j * b_dim1] = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+
+/*     B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) */
+
+    if (*rank < *n) {
+	i__1 = *n - *rank;
+	i__2 = *lwork - (mn << 1);
+	sormrz_("Left", "Transpose", n, nrhs, rank, &i__1, &a[a_offset], lda, 
+		&work[mn + 1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__2,
+		 info);
+    }
+
+/*     workspace: 2*MN+NRHS. */
+
+/*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */
+
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[jpvt[i__]] = b[i__ + j * b_dim1];
+/* L50: */
+	}
+	scopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1);
+/* L60: */
+    }
+
+/*     workspace: N. */
+
+/*     Undo scaling */
+
+    if (iascl == 1) {
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	slascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], 
+		lda, info);
+    } else if (iascl == 2) {
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	slascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], 
+		lda, info);
+    }
+    if (ibscl == 1) {
+	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L70:
+    work[1] = (real) lwkopt;
+
+    return 0;
+
+/*     End of SGELSY */
+
+} /* sgelsy_ */
+
diff --git a/lapack-netlib/SRC/sgemlq.c b/lapack-netlib/SRC/sgemlq.c
new file mode 100644
index 000000000..6b6b1853b
--- /dev/null
+++ b/lapack-netlib/SRC/sgemlq.c
@@ -0,0 +1,681 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGEMLQ */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE SGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, */
+/*     $                   TSIZE, C, LDC, WORK, LWORK, INFO ) */
+
+
+/*      CHARACTER          SIDE, TRANS */
+/*      INTEGER            INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */
+/*      REAL               A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) */
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >     SGEMLQ overwrites the general real M-by-N matrix C with */
+/* > */
+/* >                    SIDE = 'L'     SIDE = 'R' */
+/* >    TRANS = 'N':      Q * C          C * Q */
+/* >    TRANS = 'T':      Q**T * C       C * Q**T */
+/* >    where Q is a real orthogonal matrix defined as the product */
+/* >    of blocked elementary reflectors computed by short wide LQ */
+/* >    factorization (SGELQ) */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'L': apply Q or Q**T from the Left; */
+/* >          = 'R': apply Q or Q**T from the Right. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N':  No transpose, apply Q; */
+/* >          = 'T':  Transpose, apply Q**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The number of elementary reflectors whose product defines */
+/* >          the matrix Q. */
+/* >          If SIDE = 'L', M >= K >= 0; */
+/* >          if SIDE = 'R', N >= K >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension */
+/* >                               (LDA,M) if SIDE = 'L', */
+/* >                               (LDA,N) if SIDE = 'R' */
+/* >          Part of the data structure to represent Q as returned by DGELQ. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,K). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (MAX(5,TSIZE)). */
+/* >          Part of the data structure to represent Q as returned by SGELQ. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TSIZE */
+/* > \verbatim */
+/* >          TSIZE is INTEGER */
+/* >          The dimension of the array T. TSIZE >= 5. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (LDC,N) */
+/* >          On entry, the M-by-N matrix C. */
+/* >          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >         (workspace) REAL array, dimension (MAX(1,LWORK)) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1, then a workspace query is assumed. The routine */
+/* >          only calculates the size of the WORK array, returns this */
+/* >          value as WORK(1), and no error message related to WORK */
+/* >          is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \par Further Details */
+/*  ==================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > These details are particular for this LAPACK implementation. Users should not */
+/* > take them for granted. These details may change in the future, and are not likely */
+/* > true for another LAPACK implementation. These details are relevant if one wants */
+/* > to try to understand the code. They are not part of the interface. */
+/* > */
+/* > In this version, */
+/* > */
+/* >          T(2): row block size (MB) */
+/* >          T(3): column block size (NB) */
+/* >          T(6:TSIZE): data structure needed for Q, computed by */
+/* >                           SLASWLQ or SGELQT */
+/* > */
+/* >  Depending on the matrix dimensions M and N, and row and column */
+/* >  block sizes MB and NB returned by ILAENV, SGELQ will use either */
+/* >  SLASWLQ (if the matrix is wide-and-short) or SGELQT to compute */
+/* >  the LQ factorization. */
+/* >  This version of SGEMLQ will use either SLAMSWLQ or SGEMLQT to */
+/* >  multiply matrix Q by another matrix. */
+/* >  Further Details in SLAMSWLQ or SGEMLQT. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgemlq_(char *side, char *trans, integer *m, integer *n, 
+	integer *k, real *a, integer *lda, real *t, integer *tsize, real *c__,
+	 integer *ldc, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1;
+
+    /* Local variables */
+    logical left, tran;
+    extern /* Subroutine */ int slamswlq_(char *, char *, integer *, integer *
+	    , integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *, integer *);
+    extern logical lsame_(char *, char *);
+    logical right;
+    integer mb, nb, mn, lw, nblcks;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical notran, lquery;
+    extern /* Subroutine */ int sgemlqt_(char *, char *, integer *, integer *,
+	     integer *, integer *, real *, integer *, real *, integer *, real 
+	    *, integer *, real *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/* ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --t;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    lquery = *lwork == -1;
+    notran = lsame_(trans, "N");
+    tran = lsame_(trans, "T");
+    left = lsame_(side, "L");
+    right = lsame_(side, "R");
+
+    mb = (integer) t[2];
+    nb = (integer) t[3];
+    if (left) {
+	lw = *n * mb;
+	mn = *m;
+    } else {
+	lw = *m * mb;
+	mn = *n;
+    }
+
+    if (nb > *k && mn > *k) {
+	if ((mn - *k) % (nb - *k) == 0) {
+	    nblcks = (mn - *k) / (nb - *k);
+	} else {
+	    nblcks = (mn - *k) / (nb - *k) + 1;
+	}
+    } else {
+	nblcks = 1;
+    }
+
+    *info = 0;
+    if (! left && ! right) {
+	*info = -1;
+    } else if (! tran && ! notran) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > mn) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*k)) {
+	*info = -7;
+    } else if (*tsize < 5) {
+	*info = -9;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -11;
+    } else if (*lwork < f2cmax(1,lw) && ! lquery) {
+	*info = -13;
+    }
+
+    if (*info == 0) {
+	work[1] = (real) lw;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEMLQ", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+/* Computing MIN */
+    i__1 = f2cmin(*m,*n);
+    if (f2cmin(i__1,*k) == 0) {
+	return 0;
+    }
+
+/* Computing MAX */
+    i__1 = f2cmax(*m,*n);
+    if (left && *m <= *k || right && *n <= *k || nb <= *k || nb >= f2cmax(i__1,*
+	    k)) {
+	sgemlqt_(side, trans, m, n, k, &mb, &a[a_offset], lda, &t[6], &mb, &
+		c__[c_offset], ldc, &work[1], info);
+    } else {
+	slamswlq_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], &
+		mb, &c__[c_offset], ldc, &work[1], lwork, info);
+    }
+
+    work[1] = (real) lw;
+
+    return 0;
+
+/*     End of SGEMLQ */
+
+} /* sgemlq_ */
+
diff --git a/lapack-netlib/SRC/sgemlqt.c b/lapack-netlib/SRC/sgemlqt.c
new file mode 100644
index 000000000..6b2ae2945
--- /dev/null
+++ b/lapack-netlib/SRC/sgemlqt.c
@@ -0,0 +1,686 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGEMLQT */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, */
+/*                          C, LDC, WORK, INFO ) */
+
+/*       CHARACTER SIDE, TRANS */
+/*       INTEGER   INFO, K, LDV, LDC, M, N, MB, LDT */
+/*       REAL      V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEMLQT overwrites the general real M-by-N matrix C with */
+/* > */
+/* >                 SIDE = 'L'     SIDE = 'R' */
+/* > TRANS = 'N':      Q C            C Q */
+/* > TRANS = 'T':   Q**T C            C Q**T */
+/* > */
+/* > where Q is a real orthogonal matrix defined as the product of K */
+/* > elementary reflectors: */
+/* > */
+/* >       Q = H(1) H(2) . . . H(K) = I - V T V**T */
+/* > */
+/* > generated using the compact WY representation as returned by DGELQT. */
+/* > */
+/* > Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'L': apply Q or Q**T from the Left; */
+/* >          = 'R': apply Q or Q**T from the Right. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N':  No transpose, apply Q; */
+/* >          = 'C':  Transpose, apply Q**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix C. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The number of elementary reflectors whose product defines */
+/* >          the matrix Q. */
+/* >          If SIDE = 'L', M >= K >= 0; */
+/* >          if SIDE = 'R', N >= K >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MB */
+/* > \verbatim */
+/* >          MB is INTEGER */
+/* >          The block size used for the storage of T.  K >= MB >= 1. */
+/* >          This must be the same value of MB used to generate T */
+/* >          in DGELQT. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension */
+/* >                               (LDV,M) if SIDE = 'L', */
+/* >                               (LDV,N) if SIDE = 'R' */
+/* >          The i-th row must contain the vector which defines the */
+/* >          elementary reflector H(i), for i = 1,2,...,k, as returned by */
+/* >          DGELQT in the first K rows of its array argument A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. LDV >= f2cmax(1,K). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT,K) */
+/* >          The upper triangular factors of the block reflectors */
+/* >          as returned by DGELQT, stored as a MB-by-K matrix. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= MB. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (LDC,N) */
+/* >          On entry, the M-by-N matrix C. */
+/* >          On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array. The dimension of */
+/* >          WORK is N*MB if SIDE = 'L', or  M*MB if SIDE = 'R'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgemlqt_(char *side, char *trans, integer *m, integer *n,
+	 integer *k, integer *mb, real *v, integer *ldv, real *t, integer *
+	ldt, real *c__, integer *ldc, real *work, integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, 
+	    i__3, i__4;
+
+    /* Local variables */
+    logical left, tran;
+    integer i__;
+    extern logical lsame_(char *, char *);
+    logical right;
+    integer ib, kf;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
+    logical notran;
+    integer ldwork;
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    right = lsame_(side, "R");
+    tran = lsame_(trans, "T");
+    notran = lsame_(trans, "N");
+
+    if (left) {
+	ldwork = f2cmax(1,*n);
+    } else if (right) {
+	ldwork = f2cmax(1,*m);
+    }
+    if (! left && ! right) {
+	*info = -1;
+    } else if (! tran && ! notran) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0) {
+	*info = -5;
+    } else if (*mb < 1 || *mb > *k && *k > 0) {
+	*info = -6;
+    } else if (*ldv < f2cmax(1,*k)) {
+	*info = -8;
+    } else if (*ldt < *mb) {
+	*info = -10;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -12;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEMLQT", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && notran) {
+
+	i__1 = *k;
+	i__2 = *mb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = *mb, i__4 = *k - i__ + 1;
+	    ib = f2cmin(i__3,i__4);
+	    i__3 = *m - i__ + 1;
+	    slarfb_("L", "T", "F", "R", &i__3, n, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
+		    &work[1], &ldwork);
+	}
+
+    } else if (right && tran) {
+
+	i__2 = *k;
+	i__1 = *mb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+	    i__3 = *mb, i__4 = *k - i__ + 1;
+	    ib = f2cmin(i__3,i__4);
+	    i__3 = *n - i__ + 1;
+	    slarfb_("R", "N", "F", "R", m, &i__3, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
+		    ldc, &work[1], &ldwork);
+	}
+
+    } else if (left && tran) {
+
+	kf = (*k - 1) / *mb * *mb + 1;
+	i__1 = -(*mb);
+	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = *mb, i__3 = *k - i__ + 1;
+	    ib = f2cmin(i__2,i__3);
+	    i__2 = *m - i__ + 1;
+	    slarfb_("L", "N", "F", "R", &i__2, n, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
+		    &work[1], &ldwork);
+	}
+
+    } else if (right && notran) {
+
+	kf = (*k - 1) / *mb * *mb + 1;
+	i__1 = -(*mb);
+	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = *mb, i__3 = *k - i__ + 1;
+	    ib = f2cmin(i__2,i__3);
+	    i__2 = *n - i__ + 1;
+	    slarfb_("R", "T", "F", "R", m, &i__2, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
+		    ldc, &work[1], &ldwork);
+	}
+
+    }
+
+    return 0;
+
+/*     End of SGEMLQT */
+
+} /* sgemlqt_ */
+
diff --git a/lapack-netlib/SRC/sgemqr.c b/lapack-netlib/SRC/sgemqr.c
new file mode 100644
index 000000000..82248d4ec
--- /dev/null
+++ b/lapack-netlib/SRC/sgemqr.c
@@ -0,0 +1,683 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGEMQR */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE SGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, */
+/*     $                   TSIZE, C, LDC, WORK, LWORK, INFO ) */
+
+
+/*     CHARACTER         SIDE, TRANS */
+/*     INTEGER           INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */
+/*     REAL              A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) */
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEMQR overwrites the general real M-by-N matrix C with */
+/* > */
+/* >                      SIDE = 'L'     SIDE = 'R' */
+/* >     TRANS = 'N':      Q * C          C * Q */
+/* >     TRANS = 'T':      Q**T * C       C * Q**T */
+/* > */
+/* > where Q is a real orthogonal matrix defined as the product */
+/* > of blocked elementary reflectors computed by tall skinny */
+/* > QR factorization (SGEQR) */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'L': apply Q or Q**T from the Left; */
+/* >          = 'R': apply Q or Q**T from the Right. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N':  No transpose, apply Q; */
+/* >          = 'T':  Transpose, apply Q**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The number of elementary reflectors whose product defines */
+/* >          the matrix Q. */
+/* >          If SIDE = 'L', M >= K >= 0; */
+/* >          if SIDE = 'R', N >= K >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,K) */
+/* >          Part of the data structure to represent Q as returned by SGEQR. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. */
+/* >          If SIDE = 'L', LDA >= f2cmax(1,M); */
+/* >          if SIDE = 'R', LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (MAX(5,TSIZE)). */
+/* >          Part of the data structure to represent Q as returned by SGEQR. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TSIZE */
+/* > \verbatim */
+/* >          TSIZE is INTEGER */
+/* >          The dimension of the array T. TSIZE >= 5. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (LDC,N) */
+/* >          On entry, the M-by-N matrix C. */
+/* >          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >         (workspace) REAL array, dimension (MAX(1,LWORK)) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1, then a workspace query is assumed. The routine */
+/* >          only calculates the size of the WORK array, returns this */
+/* >          value as WORK(1), and no error message related to WORK */
+/* >          is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \par Further Details */
+/*  ==================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > These details are particular for this LAPACK implementation. Users should not */
+/* > take them for granted. These details may change in the future, and are not likely */
+/* > true for another LAPACK implementation. These details are relevant if one wants */
+/* > to try to understand the code. They are not part of the interface. */
+/* > */
+/* > In this version, */
+/* > */
+/* >          T(2): row block size (MB) */
+/* >          T(3): column block size (NB) */
+/* >          T(6:TSIZE): data structure needed for Q, computed by */
+/* >                           SLATSQR or SGEQRT */
+/* > */
+/* >  Depending on the matrix dimensions M and N, and row and column */
+/* >  block sizes MB and NB returned by ILAENV, SGEQR will use either */
+/* >  SLATSQR (if the matrix is tall-and-skinny) or SGEQRT to compute */
+/* >  the QR factorization. */
+/* >  This version of SGEMQR will use either SLAMTSQR or SGEMQRT to */
+/* >  multiply matrix Q by another matrix. */
+/* >  Further Details in SLAMTSQR or SGEMQRT. */
+/* > */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgemqr_(char *side, char *trans, integer *m, integer *n, 
+	integer *k, real *a, integer *lda, real *t, integer *tsize, real *c__,
+	 integer *ldc, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1;
+
+    /* Local variables */
+    logical left, tran;
+    extern /* Subroutine */ int slamtsqr_(char *, char *, integer *, integer *
+	    , integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *, integer *);
+    extern logical lsame_(char *, char *);
+    logical right;
+    integer mb, nb, mn, lw, nblcks;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical notran, lquery;
+    extern /* Subroutine */ int sgemqrt_(char *, char *, integer *, integer *,
+	     integer *, integer *, real *, integer *, real *, integer *, real 
+	    *, integer *, real *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/* ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --t;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    lquery = *lwork == -1;
+    notran = lsame_(trans, "N");
+    tran = lsame_(trans, "T");
+    left = lsame_(side, "L");
+    right = lsame_(side, "R");
+
+    mb = (integer) t[2];
+    nb = (integer) t[3];
+    if (left) {
+	lw = *n * nb;
+	mn = *m;
+    } else {
+	lw = mb * nb;
+	mn = *n;
+    }
+
+    if (mb > *k && mn > *k) {
+	if ((mn - *k) % (mb - *k) == 0) {
+	    nblcks = (mn - *k) / (mb - *k);
+	} else {
+	    nblcks = (mn - *k) / (mb - *k) + 1;
+	}
+    } else {
+	nblcks = 1;
+    }
+
+    *info = 0;
+    if (! left && ! right) {
+	*info = -1;
+    } else if (! tran && ! notran) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > mn) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,mn)) {
+	*info = -7;
+    } else if (*tsize < 5) {
+	*info = -9;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -11;
+    } else if (*lwork < f2cmax(1,lw) && ! lquery) {
+	*info = -13;
+    }
+
+    if (*info == 0) {
+	work[1] = (real) lw;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEMQR", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+/* Computing MIN */
+    i__1 = f2cmin(*m,*n);
+    if (f2cmin(i__1,*k) == 0) {
+	return 0;
+    }
+
+/* Computing MAX */
+    i__1 = f2cmax(*m,*n);
+    if (left && *m <= *k || right && *n <= *k || mb <= *k || mb >= f2cmax(i__1,*
+	    k)) {
+	sgemqrt_(side, trans, m, n, k, &nb, &a[a_offset], lda, &t[6], &nb, &
+		c__[c_offset], ldc, &work[1], info);
+    } else {
+	slamtsqr_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], &
+		nb, &c__[c_offset], ldc, &work[1], lwork, info);
+    }
+
+    work[1] = (real) lw;
+
+    return 0;
+
+/*     End of SGEMQR */
+
+} /* sgemqr_ */
+
diff --git a/lapack-netlib/SRC/sgemqrt.c b/lapack-netlib/SRC/sgemqrt.c
new file mode 100644
index 000000000..42d3ce051
--- /dev/null
+++ b/lapack-netlib/SRC/sgemqrt.c
@@ -0,0 +1,706 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGEMQRT */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEMQRT + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgemqrt
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgemqrt
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgemqrt
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, */
+/*                          C, LDC, WORK, INFO ) */
+
+/*       CHARACTER SIDE, TRANS */
+/*       INTEGER   INFO, K, LDV, LDC, M, N, NB, LDT */
+/*       REAL   V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEMQRT overwrites the general real M-by-N matrix C with */
+/* > */
+/* >                 SIDE = 'L'     SIDE = 'R' */
+/* > TRANS = 'N':      Q C            C Q */
+/* > TRANS = 'T':   Q**T C            C Q**T */
+/* > */
+/* > where Q is a real orthogonal matrix defined as the product of K */
+/* > elementary reflectors: */
+/* > */
+/* >       Q = H(1) H(2) . . . H(K) = I - V T V**T */
+/* > */
+/* > generated using the compact WY representation as returned by SGEQRT. */
+/* > */
+/* > Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'L': apply Q or Q**T from the Left; */
+/* >          = 'R': apply Q or Q**T from the Right. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N':  No transpose, apply Q; */
+/* >          = 'T':  Transpose, apply Q**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix C. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The number of elementary reflectors whose product defines */
+/* >          the matrix Q. */
+/* >          If SIDE = 'L', M >= K >= 0; */
+/* >          if SIDE = 'R', N >= K >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NB */
+/* > \verbatim */
+/* >          NB is INTEGER */
+/* >          The block size used for the storage of T.  K >= NB >= 1. */
+/* >          This must be the same value of NB used to generate T */
+/* >          in CGEQRT. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension (LDV,K) */
+/* >          The i-th column must contain the vector which defines the */
+/* >          elementary reflector H(i), for i = 1,2,...,k, as returned by */
+/* >          CGEQRT in the first K columns of its array argument A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. */
+/* >          If SIDE = 'L', LDA >= f2cmax(1,M); */
+/* >          if SIDE = 'R', LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT,K) */
+/* >          The upper triangular factors of the block reflectors */
+/* >          as returned by CGEQRT, stored as a NB-by-N matrix. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= NB. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (LDC,N) */
+/* >          On entry, the M-by-N matrix C. */
+/* >          On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array. The dimension of WORK is */
+/* >           N*NB if SIDE = 'L', or  M*NB if SIDE = 'R'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgemqrt_(char *side, char *trans, integer *m, integer *n,
+	 integer *k, integer *nb, real *v, integer *ldv, real *t, integer *
+	ldt, real *c__, integer *ldc, real *work, integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, 
+	    i__3, i__4;
+
+    /* Local variables */
+    logical left, tran;
+    integer i__, q;
+    extern logical lsame_(char *, char *);
+    logical right;
+    integer ib, kf;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
+    logical notran;
+    integer ldwork;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    right = lsame_(side, "R");
+    tran = lsame_(trans, "T");
+    notran = lsame_(trans, "N");
+
+    if (left) {
+	ldwork = f2cmax(1,*n);
+	q = *m;
+    } else if (right) {
+	ldwork = f2cmax(1,*m);
+	q = *n;
+    }
+    if (! left && ! right) {
+	*info = -1;
+    } else if (! tran && ! notran) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > q) {
+	*info = -5;
+    } else if (*nb < 1 || *nb > *k && *k > 0) {
+	*info = -6;
+    } else if (*ldv < f2cmax(1,q)) {
+	*info = -8;
+    } else if (*ldt < *nb) {
+	*info = -10;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -12;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEMQRT", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && tran) {
+
+	i__1 = *k;
+	i__2 = *nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = *nb, i__4 = *k - i__ + 1;
+	    ib = f2cmin(i__3,i__4);
+	    i__3 = *m - i__ + 1;
+	    slarfb_("L", "T", "F", "C", &i__3, n, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
+		    &work[1], &ldwork);
+	}
+
+    } else if (right && notran) {
+
+	i__2 = *k;
+	i__1 = *nb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+	    i__3 = *nb, i__4 = *k - i__ + 1;
+	    ib = f2cmin(i__3,i__4);
+	    i__3 = *n - i__ + 1;
+	    slarfb_("R", "N", "F", "C", m, &i__3, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
+		    ldc, &work[1], &ldwork);
+	}
+
+    } else if (left && notran) {
+
+	kf = (*k - 1) / *nb * *nb + 1;
+	i__1 = -(*nb);
+	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = *nb, i__3 = *k - i__ + 1;
+	    ib = f2cmin(i__2,i__3);
+	    i__2 = *m - i__ + 1;
+	    slarfb_("L", "N", "F", "C", &i__2, n, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
+		    &work[1], &ldwork);
+	}
+
+    } else if (right && tran) {
+
+	kf = (*k - 1) / *nb * *nb + 1;
+	i__1 = -(*nb);
+	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = *nb, i__3 = *k - i__ + 1;
+	    ib = f2cmin(i__2,i__3);
+	    i__2 = *n - i__ + 1;
+	    slarfb_("R", "T", "F", "C", m, &i__2, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
+		    ldc, &work[1], &ldwork);
+	}
+
+    }
+
+    return 0;
+
+/*     End of SGEMQRT */
+
+} /* sgemqrt_ */
+
diff --git a/lapack-netlib/SRC/sgeql2.c b/lapack-netlib/SRC/sgeql2.c
new file mode 100644
index 000000000..e785c39ea
--- /dev/null
+++ b/lapack-netlib/SRC/sgeql2.c
@@ -0,0 +1,591 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorit
+hm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQL2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeql2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeql2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeql2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQL2 computes a QL factorization of a real m by n matrix A: */
+/* > A = Q * L. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, if m >= n, the lower triangle of the subarray */
+/* >          A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; */
+/* >          if m <= n, the elements on and below the (n-m)-th */
+/* >          superdiagonal contain the m by n lower trapezoidal matrix L; */
+/* >          the remaining elements, with the array TAU, represent the */
+/* >          orthogonal matrix Q as a product of elementary reflectors */
+/* >          (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */
+/* >  A(1:m-k+i-1,n-k+i), and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeql2_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
+	    integer *, real *);
+    real aii;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQL2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    for (i__ = k; i__ >= 1; --i__) {
+
+/*        Generate elementary reflector H(i) to annihilate */
+/*        A(1:m-k+i-1,n-k+i) */
+
+	i__1 = *m - k + i__;
+	slarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[(*n - k 
+		+ i__) * a_dim1 + 1], &c__1, &tau[i__]);
+
+/*        Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left */
+
+	aii = a[*m - k + i__ + (*n - k + i__) * a_dim1];
+	a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.f;
+	i__1 = *m - k + i__;
+	i__2 = *n - k + i__ - 1;
+	slarf_("Left", &i__1, &i__2, &a[(*n - k + i__) * a_dim1 + 1], &c__1, &
+		tau[i__], &a[a_offset], lda, &work[1]);
+	a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGEQL2 */
+
+} /* sgeql2_ */
+
diff --git a/lapack-netlib/SRC/sgeqlf.c b/lapack-netlib/SRC/sgeqlf.c
new file mode 100644
index 000000000..c792696c3
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqlf.c
@@ -0,0 +1,709 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b SGEQLF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQLF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqlf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqlf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqlf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQLF computes a QL factorization of a real M-by-N matrix A: */
+/* > A = Q * L. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          if m >= n, the lower triangle of the subarray */
+/* >          A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; */
+/* >          if m <= n, the elements on and below the (n-m)-th */
+/* >          superdiagonal contain the M-by-N lower trapezoidal matrix L; */
+/* >          the remaining elements, with the array TAU, represent the */
+/* >          orthogonal matrix Q as a product of elementary reflectors */
+/* >          (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For optimum performance LWORK >= N*NB, where NB is the */
+/* >          optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */
+/* >  A(1:m-k+i-1,n-k+i), and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqlf_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo;
+    extern /* Subroutine */ int sgeql2_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *);
+    integer ib, nb, ki, kk, mu, nu, nx;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
+	    real *, integer *, real *, real *, integer *);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+
+    if (*info == 0) {
+	k = f2cmin(*m,*n);
+	if (k == 0) {
+	    lwkopt = 1;
+	} else {
+	    nb = ilaenv_(&c__1, "SGEQLF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    lwkopt = *n * nb;
+	}
+	work[1] = (real) lwkopt;
+
+	if (*lwork < f2cmax(1,*n) && ! lquery) {
+	    *info = -7;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQLF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (k == 0) {
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 1;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQLF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQLF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially. */
+/*        The last kk columns are handled by the block method. */
+
+	ki = (k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = k, i__2 = ki + nb;
+	kk = f2cmin(i__1,i__2);
+
+	i__1 = k - kk + 1;
+	i__2 = -nb;
+	for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ 
+		+= i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the QL factorization of the current block */
+/*           A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */
+
+	    i__3 = *m - k + i__ + ib - 1;
+	    sgeql2_(&i__3, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &tau[
+		    i__], &work[1], &iinfo);
+	    if (*n - k + i__ > 1) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i+ib-1) . . . H(i+1) H(i) */
+
+		i__3 = *m - k + i__ + ib - 1;
+		slarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - k + 
+			i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */
+
+		i__3 = *m - k + i__ + ib - 1;
+		i__4 = *n - k + i__ - 1;
+		slarfb_("Left", "Transpose", "Backward", "Columnwise", &i__3, 
+			&i__4, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &
+			work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &
+			ldwork);
+	    }
+/* L10: */
+	}
+	mu = *m - k + i__ + nb - 1;
+	nu = *n - k + i__ + nb - 1;
+    } else {
+	mu = *m;
+	nu = *n;
+    }
+
+/*     Use unblocked code to factor the last or only block */
+
+    if (mu > 0 && nu > 0) {
+	sgeql2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGEQLF */
+
+} /* sgeqlf_ */
+
diff --git a/lapack-netlib/SRC/sgeqp3.c b/lapack-netlib/SRC/sgeqp3.c
new file mode 100644
index 000000000..d397dba70
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqp3.c
@@ -0,0 +1,789 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b SGEQP3 */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQP3 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqp3.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqp3.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqp3.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       INTEGER            JPVT( * ) */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQP3 computes a QR factorization with column pivoting of a */
+/* > matrix A:  A*P = Q*R  using Level 3 BLAS. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the upper triangle of the array contains the */
+/* >          f2cmin(M,N)-by-N upper trapezoidal matrix R; the elements below */
+/* >          the diagonal, together with the array TAU, represent the */
+/* >          orthogonal matrix Q as a product of f2cmin(M,N) elementary */
+/* >          reflectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] JPVT */
+/* > \verbatim */
+/* >          JPVT is INTEGER array, dimension (N) */
+/* >          On entry, if JPVT(J).ne.0, the J-th column of A is permuted */
+/* >          to the front of A*P (a leading column); if JPVT(J)=0, */
+/* >          the J-th column of A is a free column. */
+/* >          On exit, if JPVT(J)=K, then the J-th column of A*P was the */
+/* >          the K-th column of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO=0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= 3*N+1. */
+/* >          For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB */
+/* >          is the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit. */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real/complex vector */
+/* >  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in */
+/* >  A(i+1:m,i), and tau in TAU(i). */
+/* > \endverbatim */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
+/* >    X. Sun, Computer Science Dept., Duke University, USA */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqp3_(integer *m, integer *n, real *a, integer *lda, 
+	integer *jpvt, real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer nfxd;
+    extern real snrm2_(integer *, real *, integer *);
+    integer j, nbmin, minmn, minws;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *), slaqp2_(integer *, integer *, integer *, real *, 
+	    integer *, integer *, real *, real *, real *, real *);
+    integer jb, na, nb, sm, sn, nx;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *);
+    integer topbmn, sminmn;
+    extern /* Subroutine */ int slaqps_(integer *, integer *, integer *, 
+	    integer *, integer *, real *, integer *, integer *, real *, real *
+	    , real *, real *, real *, integer *);
+    integer lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    integer fjb, iws;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+/*     Test input arguments */
+/*  ==================== */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --jpvt;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+
+    if (*info == 0) {
+	minmn = f2cmin(*m,*n);
+	if (minmn == 0) {
+	    iws = 1;
+	    lwkopt = 1;
+	} else {
+	    iws = *n * 3 + 1;
+	    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    lwkopt = (*n << 1) + (*n + 1) * nb;
+	}
+	work[1] = (real) lwkopt;
+
+	if (*lwork < iws && ! lquery) {
+	    *info = -8;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQP3", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Move initial columns up front. */
+
+    nfxd = 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (jpvt[j] != 0) {
+	    if (j != nfxd) {
+		sswap_(m, &a[j * a_dim1 + 1], &c__1, &a[nfxd * a_dim1 + 1], &
+			c__1);
+		jpvt[j] = jpvt[nfxd];
+		jpvt[nfxd] = j;
+	    } else {
+		jpvt[j] = j;
+	    }
+	    ++nfxd;
+	} else {
+	    jpvt[j] = j;
+	}
+/* L10: */
+    }
+    --nfxd;
+
+/*     Factorize fixed columns */
+/*  ======================= */
+
+/*     Compute the QR factorization of fixed columns and update */
+/*     remaining columns. */
+
+    if (nfxd > 0) {
+	na = f2cmin(*m,nfxd);
+/* CC      CALL SGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) */
+	sgeqrf_(m, &na, &a[a_offset], lda, &tau[1], &work[1], lwork, info);
+/* Computing MAX */
+	i__1 = iws, i__2 = (integer) work[1];
+	iws = f2cmax(i__1,i__2);
+	if (na < *n) {
+/* CC         CALL SORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, */
+/* CC  $                   TAU, A( 1, NA+1 ), LDA, WORK, INFO ) */
+	    i__1 = *n - na;
+	    sormqr_("Left", "Transpose", m, &i__1, &na, &a[a_offset], lda, &
+		    tau[1], &a[(na + 1) * a_dim1 + 1], lda, &work[1], lwork, 
+		    info);
+/* Computing MAX */
+	    i__1 = iws, i__2 = (integer) work[1];
+	    iws = f2cmax(i__1,i__2);
+	}
+    }
+
+/*     Factorize free columns */
+/*  ====================== */
+
+    if (nfxd < minmn) {
+
+	sm = *m - nfxd;
+	sn = *n - nfxd;
+	sminmn = minmn - nfxd;
+
+/*        Determine the block size. */
+
+	nb = ilaenv_(&c__1, "SGEQRF", " ", &sm, &sn, &c_n1, &c_n1, (ftnlen)6, 
+		(ftnlen)1);
+	nbmin = 2;
+	nx = 0;
+
+	if (nb > 1 && nb < sminmn) {
+
+/*           Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	    i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", &sm, &sn, &c_n1, &
+		    c_n1, (ftnlen)6, (ftnlen)1);
+	    nx = f2cmax(i__1,i__2);
+
+
+	    if (nx < sminmn) {
+
+/*              Determine if workspace is large enough for blocked code. */
+
+		minws = (sn << 1) + (sn + 1) * nb;
+		iws = f2cmax(iws,minws);
+		if (*lwork < minws) {
+
+/*                 Not enough workspace to use optimal NB: Reduce NB and */
+/*                 determine the minimum value of NB. */
+
+		    nb = (*lwork - (sn << 1)) / (sn + 1);
+/* Computing MAX */
+		    i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", &sm, &sn, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		    nbmin = f2cmax(i__1,i__2);
+
+
+		}
+	    }
+	}
+
+/*        Initialize partial column norms. The first N elements of work */
+/*        store the exact column norms. */
+
+	i__1 = *n;
+	for (j = nfxd + 1; j <= i__1; ++j) {
+	    work[j] = snrm2_(&sm, &a[nfxd + 1 + j * a_dim1], &c__1);
+	    work[*n + j] = work[j];
+/* L20: */
+	}
+
+	if (nb >= nbmin && nb < sminmn && nx < sminmn) {
+
+/*           Use blocked code initially. */
+
+	    j = nfxd + 1;
+
+/*           Compute factorization: while loop. */
+
+
+	    topbmn = minmn - nx;
+L30:
+	    if (j <= topbmn) {
+/* Computing MIN */
+		i__1 = nb, i__2 = topbmn - j + 1;
+		jb = f2cmin(i__1,i__2);
+
+/*              Factorize JB columns among columns J:N. */
+
+		i__1 = *n - j + 1;
+		i__2 = j - 1;
+		i__3 = *n - j + 1;
+		slaqps_(m, &i__1, &i__2, &jb, &fjb, &a[j * a_dim1 + 1], lda, &
+			jpvt[j], &tau[j], &work[j], &work[*n + j], &work[(*n 
+			<< 1) + 1], &work[(*n << 1) + jb + 1], &i__3);
+
+		j += fjb;
+		goto L30;
+	    }
+	} else {
+	    j = nfxd + 1;
+	}
+
+/*        Use unblocked code to factor the last or only block. */
+
+
+	if (j <= minmn) {
+	    i__1 = *n - j + 1;
+	    i__2 = j - 1;
+	    slaqp2_(m, &i__1, &i__2, &a[j * a_dim1 + 1], lda, &jpvt[j], &tau[
+		    j], &work[j], &work[*n + j], &work[(*n << 1) + 1]);
+	}
+
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGEQP3 */
+
+} /* sgeqp3_ */
+
diff --git a/lapack-netlib/SRC/sgeqr.c b/lapack-netlib/SRC/sgeqr.c
new file mode 100644
index 000000000..6d268fd33
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqr.c
@@ -0,0 +1,733 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__2 = 2;
+
+/* > \brief \b SGEQR */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, */
+/*                         INFO ) */
+
+/*       INTEGER           INFO, LDA, M, N, TSIZE, LWORK */
+/*       REAL              A( LDA, * ), T( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQR computes a QR factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a M-by-M orthogonal matrix; */
+/* >    R is an upper-triangular N-by-N matrix; */
+/* >    0 is a (M-N)-by-N zero matrix, if M > N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R */
+/* >          (R is upper triangular if M >= N); */
+/* >          the elements below the diagonal are used to store part of the */
+/* >          data structure to represent Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (MAX(5,TSIZE)) */
+/* >          On exit, if INFO = 0, T(1) returns optimal (or either minimal */
+/* >          or optimal, if query is assumed) TSIZE. See TSIZE for details. */
+/* >          Remaining T contains part of the data structure used to represent Q. */
+/* >          If one wants to apply or construct Q, then one needs to keep T */
+/* >          (in addition to A) and pass it to further subroutines. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TSIZE */
+/* > \verbatim */
+/* >          TSIZE is INTEGER */
+/* >          If TSIZE >= 5, the dimension of the array T. */
+/* >          If TSIZE = -1 or -2, then a workspace query is assumed. The routine */
+/* >          only calculates the sizes of the T and WORK arrays, returns these */
+/* >          values as the first entries of the T and WORK arrays, and no error */
+/* >          message related to T or WORK is issued by XERBLA. */
+/* >          If TSIZE = -1, the routine calculates optimal size of T for the */
+/* >          optimum performance and returns this value in T(1). */
+/* >          If TSIZE = -2, the routine calculates minimal size of T and */
+/* >          returns this value in T(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          (workspace) REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
+/* >          or optimal, if query was assumed) LWORK. */
+/* >          See LWORK for details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1 or -2, then a workspace query is assumed. The routine */
+/* >          only calculates the sizes of the T and WORK arrays, returns these */
+/* >          values as the first entries of the T and WORK arrays, and no error */
+/* >          message related to T or WORK is issued by XERBLA. */
+/* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
+/* >          optimal performance and returns this value in WORK(1). */
+/* >          If LWORK = -2, the routine calculates minimal size of WORK and */
+/* >          returns this value in WORK(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \par Further Details */
+/*  ==================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The goal of the interface is to give maximum freedom to the developers for */
+/* > creating any QR factorization algorithm they wish. The triangular */
+/* > (trapezoidal) R has to be stored in the upper part of A. The lower part of A */
+/* > and the array T can be used to store any relevant information for applying or */
+/* > constructing the Q factor. The WORK array can safely be discarded after exit. */
+/* > */
+/* > Caution: One should not expect the sizes of T and WORK to be the same from one */
+/* > LAPACK implementation to the other, or even from one execution to the other. */
+/* > A workspace query (for T and WORK) is needed at each execution. However, */
+/* > for a given execution, the size of T and WORK are fixed and will not change */
+/* > from one query to the next. */
+/* > */
+/* > \endverbatim */
+/* > */
+/* > \par Further Details particular to this LAPACK implementation: */
+/*  ============================================================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > These details are particular for this LAPACK implementation. Users should not */
+/* > take them for granted. These details may change in the future, and are not likely */
+/* > true for another LAPACK implementation. These details are relevant if one wants */
+/* > to try to understand the code. They are not part of the interface. */
+/* > */
+/* > In this version, */
+/* > */
+/* >          T(2): row block size (MB) */
+/* >          T(3): column block size (NB) */
+/* >          T(6:TSIZE): data structure needed for Q, computed by */
+/* >                           SLATSQR or SGEQRT */
+/* > */
+/* >  Depending on the matrix dimensions M and N, and row and column */
+/* >  block sizes MB and NB returned by ILAENV, SGEQR will use either */
+/* >  SLATSQR (if the matrix is tall-and-skinny) or SGEQRT to compute */
+/* >  the QR factorization. */
+/* > */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqr_(integer *m, integer *n, real *a, integer *lda, 
+	real *t, integer *tsize, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    logical mint, minw;
+    integer mb, nb, nblcks;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sgeqrt_(integer *, integer *, integer *, real 
+	    *, integer *, real *, integer *, real *, integer *);
+    logical lminws, lquery;
+    integer mintsz;
+    extern /* Subroutine */ int slatsqr_(integer *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , integer *);
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --t;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2;
+
+    mint = FALSE_;
+    minw = FALSE_;
+    if (*tsize == -2 || *lwork == -2) {
+	if (*tsize != -1) {
+	    mint = TRUE_;
+	}
+	if (*lwork != -1) {
+	    minw = TRUE_;
+	}
+    }
+
+/*     Determine the block size */
+
+    if (f2cmin(*m,*n) > 0) {
+	mb = ilaenv_(&c__1, "SGEQR ", " ", m, n, &c__1, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	nb = ilaenv_(&c__1, "SGEQR ", " ", m, n, &c__2, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+    } else {
+	mb = *m;
+	nb = 1;
+    }
+    if (mb > *m || mb <= *n) {
+	mb = *m;
+    }
+    if (nb > f2cmin(*m,*n) || nb < 1) {
+	nb = 1;
+    }
+    mintsz = *n + 5;
+    if (mb > *n && *m > *n) {
+	if ((*m - *n) % (mb - *n) == 0) {
+	    nblcks = (*m - *n) / (mb - *n);
+	} else {
+	    nblcks = (*m - *n) / (mb - *n) + 1;
+	}
+    } else {
+	nblcks = 1;
+    }
+
+/*     Determine if the workspace size satisfies minimal size */
+
+    lminws = FALSE_;
+/* Computing MAX */
+    i__1 = 1, i__2 = nb * *n * nblcks + 5;
+    if ((*tsize < f2cmax(i__1,i__2) || *lwork < nb * *n) && *lwork >= *n && *
+	    tsize >= mintsz && ! lquery) {
+/* Computing MAX */
+	i__1 = 1, i__2 = nb * *n * nblcks + 5;
+	if (*tsize < f2cmax(i__1,i__2)) {
+	    lminws = TRUE_;
+	    nb = 1;
+	    mb = *m;
+	}
+	if (*lwork < nb * *n) {
+	    lminws = TRUE_;
+	    nb = 1;
+	}
+    }
+
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = nb * *n * nblcks + 5;
+	if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
+	    *info = -6;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n * nb;
+	    if (*lwork < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
+		*info = -8;
+	    }
+	}
+    }
+
+    if (*info == 0) {
+	if (mint) {
+	    t[1] = (real) mintsz;
+	} else {
+	    t[1] = (real) (nb * *n * nblcks + 5);
+	}
+	t[2] = (real) mb;
+	t[3] = (real) nb;
+	if (minw) {
+	    work[1] = (real) f2cmax(1,*n);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = nb * *n;
+	    work[1] = (real) f2cmax(i__1,i__2);
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQR", &i__1, (ftnlen)5);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (f2cmin(*m,*n) == 0) {
+	return 0;
+    }
+
+/*     The QR Decomposition */
+
+    if (*m <= *n || mb <= *n || mb >= *m) {
+	sgeqrt_(m, n, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], info);
+    } else {
+	slatsqr_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], 
+		lwork, info);
+    }
+
+/* Computing MAX */
+    i__1 = 1, i__2 = nb * *n;
+    work[1] = (real) f2cmax(i__1,i__2);
+
+    return 0;
+
+/*     End of SGEQR */
+
+} /* sgeqr_ */
+
diff --git a/lapack-netlib/SRC/sgeqr2.c b/lapack-netlib/SRC/sgeqr2.c
new file mode 100644
index 000000000..69eb614bc
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqr2.c
@@ -0,0 +1,602 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorit
+hm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQR2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqr2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqr2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqr2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQR2 computes a QR factorization of a real m-by-n matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a m-by-m orthogonal matrix; */
+/* >    R is an upper-triangular n-by-n matrix; */
+/* >    0 is a (m-n)-by-n zero matrix, if m > n. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */
+/* >          upper triangular if m >= n); the elements below the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of elementary reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
+/* >  and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqr2_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
+	    integer *, real *);
+    real aii;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQR2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1]
+		, &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGEQR2 */
+
+} /* sgeqr2_ */
+
diff --git a/lapack-netlib/SRC/sgeqr2p.c b/lapack-netlib/SRC/sgeqr2p.c
new file mode 100644
index 000000000..ee2f98679
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqr2p.c
@@ -0,0 +1,607 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagona
+l elements using an unblocked algorithm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQR2P + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqr2p
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqr2p
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqr2p
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQR2P computes a QR factorization of a real m-by-n matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a m-by-m orthogonal matrix; */
+/* >    R is an upper-triangular n-by-n matrix with nonnegative diagonal */
+/* >    entries; */
+/* >    0 is a (m-n)-by-n zero matrix, if m > n. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */
+/* >          upper triangular if m >= n). The diagonal entries of R */
+/* >          are nonnegative; the elements below the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of elementary reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
+/* >  and tau in TAU(i). */
+/* > */
+/* > See Lapack Working Note 203 for details */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqr2p_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *, ftnlen);
+    real aii;
+    extern /* Subroutine */ int slarfgp_(integer *, real *, real *, integer *,
+	     real *);
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQR2P", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	slarfgp_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * 
+		a_dim1], &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGEQR2P */
+
+} /* sgeqr2p_ */
+
diff --git a/lapack-netlib/SRC/sgeqrf.c b/lapack-netlib/SRC/sgeqrf.c
new file mode 100644
index 000000000..e642999f1
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqrf.c
@@ -0,0 +1,700 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b SGEQRF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQRF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQRF computes a QR factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a M-by-M orthogonal matrix; */
+/* >    R is an upper-triangular N-by-N matrix; */
+/* >    0 is a (M-N)-by-N zero matrix, if M > N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
+/* >          upper triangular if m >= n); the elements below the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of f2cmin(m,n) elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For optimum performance LWORK >= N*NB, where NB is */
+/* >          the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
+/* >  and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqrf_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo;
+    extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *);
+    integer ib, nb, nx;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
+	    real *, integer *, real *, real *, integer *);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQRF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the QR factorization of the current block */
+/*           A(i:m,i:i+ib-1) */
+
+	    i__3 = *m - i__ + 1;
+	    sgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i) H(i+1) . . . H(i+ib-1) */
+
+		i__3 = *m - i__ + 1;
+		slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * 
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H**T to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib 
+			+ 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	sgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGEQRF */
+
+} /* sgeqrf_ */
+
diff --git a/lapack-netlib/SRC/sgeqrfp.c b/lapack-netlib/SRC/sgeqrfp.c
new file mode 100644
index 000000000..ef5362657
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqrfp.c
@@ -0,0 +1,703 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b SGEQRFP */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQRFP + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrfp
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrfp
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrfp
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQR2P computes a QR factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a M-by-M orthogonal matrix; */
+/* >    R is an upper-triangular N-by-N matrix with nonnegative diagonal */
+/* >    entries; */
+/* >    0 is a (M-N)-by-N zero matrix, if M > N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
+/* >          upper triangular if m >= n). The diagonal entries of R */
+/* >          are nonnegative; the elements below the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of f2cmin(m,n) elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For optimum performance LWORK >= N*NB, where NB is */
+/* >          the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
+/* >  and tau in TAU(i). */
+/* > */
+/* > See Lapack Working Note 203 for details */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqrfp_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo, ib, nb, nx;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
+	    real *, integer *, real *, real *, integer *);
+    integer ldwork, lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int sgeqr2p_(integer *, integer *, real *, 
+	    integer *, real *, real *, integer *);
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQRFP", &i__1, (ftnlen)7);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the QR factorization of the current block */
+/*           A(i:m,i:i+ib-1) */
+
+	    i__3 = *m - i__ + 1;
+	    sgeqr2p_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i) H(i+1) . . . H(i+ib-1) */
+
+		i__3 = *m - i__ + 1;
+		slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * 
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H**T to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib 
+			+ 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	sgeqr2p_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		1], &iinfo);
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGEQRFP */
+
+} /* sgeqrfp_ */
+
diff --git a/lapack-netlib/SRC/sgeqrt.c b/lapack-netlib/SRC/sgeqrt.c
new file mode 100644
index 000000000..6364bfee7
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqrt.c
@@ -0,0 +1,628 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGEQRT */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQRT + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrt.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrt.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrt.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) */
+
+/*       INTEGER INFO, LDA, LDT, M, N, NB */
+/*       REAL A( LDA, * ), T( LDT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQRT computes a blocked QR factorization of a real M-by-N matrix A */
+/* > using the compact WY representation of Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NB */
+/* > \verbatim */
+/* >          NB is INTEGER */
+/* >          The block size to be used in the blocked QR.  MIN(M,N) >= NB >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
+/* >          upper triangular if M >= N); the elements below the diagonal */
+/* >          are the columns of V. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT,MIN(M,N)) */
+/* >          The upper triangular block reflectors stored in compact form */
+/* >          as a sequence of upper triangular blocks.  See below */
+/* >          for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= NB. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (NB*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
+/* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1       ) */
+/* >                   ( v1  1    ) */
+/* >                   ( v1 v2  1 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A. */
+/* > */
+/* >  Let K=MIN(M,N).  The number of blocks is B = ceiling(K/NB), where each */
+/* >  block is of order NB except for the last block, which is of order */
+/* >  IB = K - (B-1)*NB.  For each of the B blocks, a upper triangular block */
+/* >  reflector factor is computed: T1, T2, ..., TB.  The NB-by-NB (and IB-by-IB */
+/* >  for the last block) T's are stored in the NB-by-K matrix T as */
+/* > */
+/* >               T = (T1 T2 ... TB). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqrt_(integer *m, integer *n, integer *nb, real *a, 
+	integer *lda, real *t, integer *ldt, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    integer i__, k, iinfo, ib;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen), sgeqrt2_(
+	    integer *, integer *, real *, integer *, real *, integer *, 
+	    integer *), sgeqrt3_(integer *, integer *, real *, integer *, 
+	    real *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/* ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nb < 1 || *nb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldt < *nb) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQRT", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	return 0;
+    }
+
+/*     Blocked loop of length K */
+
+    i__1 = k;
+    i__2 = *nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	i__3 = k - i__ + 1;
+	ib = f2cmin(i__3,*nb);
+
+/*     Compute the QR factorization of the current block A(I:M,I:I+IB-1) */
+
+	if (TRUE_) {
+	    i__3 = *m - i__ + 1;
+	    sgeqrt3_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 
+		    + 1], ldt, &iinfo);
+	} else {
+	    i__3 = *m - i__ + 1;
+	    sgeqrt2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 
+		    + 1], ldt, &iinfo);
+	}
+	if (i__ + ib <= *n) {
+
+/*     Update by applying H**T to A(I:M,I+IB:N) from the left */
+
+	    i__3 = *m - i__ + 1;
+	    i__4 = *n - i__ - ib + 1;
+	    i__5 = *n - i__ - ib + 1;
+	    slarfb_("L", "T", "F", "C", &i__3, &i__4, &ib, &a[i__ + i__ * 
+		    a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + (i__ + 
+		    ib) * a_dim1], lda, &work[1], &i__5);
+	}
+    }
+    return 0;
+
+/*     End of SGEQRT */
+
+} /* sgeqrt_ */
+
diff --git a/lapack-netlib/SRC/sgeqrt2.c b/lapack-netlib/SRC/sgeqrt2.c
new file mode 100644
index 000000000..b778e6711
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqrt2.c
@@ -0,0 +1,645 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b5 = 1.f;
+static real c_b7 = 0.f;
+
+/* > \brief \b SGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY re
+presentation of Q. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQRT2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrt2
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrt2
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrt2
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGEQRT2( M, N, A, LDA, T, LDT, INFO ) */
+
+/*       INTEGER   INFO, LDA, LDT, M, N */
+/*       REAL   A( LDA, * ), T( LDT, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQRT2 computes a QR factorization of a real M-by-N matrix A, */
+/* > using the compact WY representation of Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
+/* >          above the diagonal contain the N-by-N upper triangular matrix R; the */
+/* >          elements below the diagonal are the columns of V.  See below for */
+/* >          further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT,N) */
+/* >          The N-by-N upper triangular factor of the block reflector. */
+/* >          The elements on and above the diagonal contain the block */
+/* >          reflector T; the elements below the diagonal are not used. */
+/* >          See below for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
+/* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1       ) */
+/* >                   ( v1  1    ) */
+/* >                   ( v1 v2  1 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
+/* >  block reflector H is then given by */
+/* > */
+/* >               H = I - V * T * V**T */
+/* > */
+/* >  where V**T is the transpose of V. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqrt2_(integer *m, integer *n, real *a, integer *lda, 
+	real *t, integer *ldt, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
+	    integer *, real *, integer *, real *, integer *);
+    integer i__, k;
+    real alpha;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *), strmv_(char *, char *, char *, integer *, real *, 
+	    integer *, real *, integer *), xerbla_(
+	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
+	    integer *, real *);
+    real aii;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*ldt < f2cmax(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQRT2", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1]
+		, &c__1, &t[i__ + t_dim1]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(I:M,I+1:N) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+
+/*           W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    sgemv_("T", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], 
+		    lda, &a[i__ + i__ * a_dim1], &c__1, &c_b7, &t[*n * t_dim1 
+		    + 1], &c__1);
+
+/*           A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H */
+
+	    alpha = -t[i__ + t_dim1];
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    sger_(&i__2, &i__3, &alpha, &a[i__ + i__ * a_dim1], &c__1, &t[*n *
+		     t_dim1 + 1], &c__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+    }
+
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	aii = a[i__ + i__ * a_dim1];
+	a[i__ + i__ * a_dim1] = 1.f;
+
+/*        T(1:I-1,I) := alpha * A(I:M,1:I-1)**T * A(I:M,I) */
+
+	alpha = -t[i__ + t_dim1];
+	i__2 = *m - i__ + 1;
+	i__3 = i__ - 1;
+	sgemv_("T", &i__2, &i__3, &alpha, &a[i__ + a_dim1], lda, &a[i__ + i__ 
+		* a_dim1], &c__1, &c_b7, &t[i__ * t_dim1 + 1], &c__1);
+	a[i__ + i__ * a_dim1] = aii;
+
+/*        T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) */
+
+	i__2 = i__ - 1;
+	strmv_("U", "N", "N", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], 
+		&c__1);
+
+/*           T(I,I) = tau(I) */
+
+	t[i__ + i__ * t_dim1] = t[i__ + t_dim1];
+	t[i__ + t_dim1] = 0.f;
+    }
+
+/*     End of SGEQRT2 */
+
+    return 0;
+} /* sgeqrt2_ */
+
diff --git a/lapack-netlib/SRC/sgeqrt3.c b/lapack-netlib/SRC/sgeqrt3.c
new file mode 100644
index 000000000..7154346c4
--- /dev/null
+++ b/lapack-netlib/SRC/sgeqrt3.c
@@ -0,0 +1,678 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b8 = 1.f;
+static real c_b20 = -1.f;
+
+/* > \brief \b SGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the c
+ompact WY representation of Q. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGEQRT3 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgeqrt3
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgeqrt3
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgeqrt3
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*        SUBROUTINE SGEQRT3( M, N, A, LDA, T, LDT, INFO ) */
+
+/*       INTEGER   INFO, LDA, M, N, LDT */
+/*       REAL   A( LDA, * ), T( LDT, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGEQRT3 recursively computes a QR factorization of a real M-by-N */
+/* > matrix A, using the compact WY representation of Q. */
+/* > */
+/* > Based on the algorithm of Elmroth and Gustavson, */
+/* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
+/* >          above the diagonal contain the N-by-N upper triangular matrix R; the */
+/* >          elements below the diagonal are the columns of V.  See below for */
+/* >          further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT,N) */
+/* >          The N-by-N upper triangular factor of the block reflector. */
+/* >          The elements on and above the diagonal contain the block */
+/* >          reflector T; the elements below the diagonal are not used. */
+/* >          See below for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
+/* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1       ) */
+/* >                   ( v1  1    ) */
+/* >                   ( v1 v2  1 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
+/* >  block reflector H is then given by */
+/* > */
+/* >               H = I - V * T * V**T */
+/* > */
+/* >  where V**T is the transpose of V. */
+/* > */
+/* >  For details of the algorithm, see Elmroth and Gustavson (cited above). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgeqrt3_(integer *m, integer *n, real *a, integer *lda, 
+	real *t, integer *ldt, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2;
+
+    /* Local variables */
+    integer i__, j, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *);
+    integer i1, j1, n1, n2;
+    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), xerbla_(char *, integer *, ftnlen), slarfg_(integer *, real *, real *, integer *, real *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -2;
+    } else if (*m < *n) {
+	*info = -1;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*ldt < f2cmax(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQRT3", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    if (*n == 1) {
+
+/*        Compute Householder transform when N=1 */
+
+	slarfg_(m, &a[a_dim1 + 1], &a[f2cmin(2,*m) + a_dim1], &c__1, &t[t_dim1 + 
+		1]);
+
+    } else {
+
+/*        Otherwise, split A into blocks... */
+
+	n1 = *n / 2;
+	n2 = *n - n1;
+/* Computing MIN */
+	i__1 = n1 + 1;
+	j1 = f2cmin(i__1,*n);
+/* Computing MIN */
+	i__1 = *n + 1;
+	i1 = f2cmin(i__1,*m);
+
+/*        Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */
+
+	sgeqrt3_(m, &n1, &a[a_offset], lda, &t[t_offset], ldt, &iinfo);
+
+/*        Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] */
+
+	i__1 = n2;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = n1;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		t[i__ + (j + n1) * t_dim1] = a[i__ + (j + n1) * a_dim1];
+	    }
+	}
+	strmm_("L", "L", "T", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 *
+		 t_dim1 + 1], ldt)
+		;
+
+	i__1 = *m - n1;
+	sgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[j1 + a_dim1], lda, &a[j1 
+		+ j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt);
+
+	strmm_("L", "U", "T", "N", &n1, &n2, &c_b8, &t[t_offset], ldt, &t[j1 *
+		 t_dim1 + 1], ldt)
+		;
+
+	i__1 = *m - n1;
+	sgemm_("N", "N", &i__1, &n2, &n1, &c_b20, &a[j1 + a_dim1], lda, &t[j1 
+		* t_dim1 + 1], ldt, &c_b8, &a[j1 + j1 * a_dim1], lda);
+
+	strmm_("L", "L", "N", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 *
+		 t_dim1 + 1], ldt)
+		;
+
+	i__1 = n2;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = n1;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + (j + n1) * a_dim1] -= t[i__ + (j + n1) * t_dim1];
+	    }
+	}
+
+/*        Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */
+
+	i__1 = *m - n1;
+	sgeqrt3_(&i__1, &n2, &a[j1 + j1 * a_dim1], lda, &t[j1 + j1 * t_dim1], 
+		ldt, &iinfo);
+
+/*        Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 */
+
+	i__1 = n1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = n2;
+	    for (j = 1; j <= i__2; ++j) {
+		t[i__ + (j + n1) * t_dim1] = a[j + n1 + i__ * a_dim1];
+	    }
+	}
+
+	strmm_("R", "L", "N", "U", &n1, &n2, &c_b8, &a[j1 + j1 * a_dim1], lda,
+		 &t[j1 * t_dim1 + 1], ldt);
+
+	i__1 = *m - *n;
+	sgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[i1 + a_dim1], lda, &a[i1 
+		+ j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt);
+
+	strmm_("L", "U", "N", "N", &n1, &n2, &c_b20, &t[t_offset], ldt, &t[j1 
+		* t_dim1 + 1], ldt);
+
+	strmm_("R", "U", "N", "N", &n1, &n2, &c_b8, &t[j1 + j1 * t_dim1], ldt,
+		 &t[j1 * t_dim1 + 1], ldt);
+
+/*        Y = (Y1,Y2); R = [ R1  A(1:N1,J1:N) ];  T = [T1 T3] */
+/*                         [  0        R2     ]       [ 0 T2] */
+
+    }
+
+    return 0;
+
+/*     End of SGEQRT3 */
+
+} /* sgeqrt3_ */
+
diff --git a/lapack-netlib/SRC/sgerfs.c b/lapack-netlib/SRC/sgerfs.c
new file mode 100644
index 000000000..5fa56c801
--- /dev/null
+++ b/lapack-netlib/SRC/sgerfs.c
@@ -0,0 +1,879 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b15 = -1.f;
+static real c_b17 = 1.f;
+
+/* > \brief \b SGERFS */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGERFS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgerfs.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgerfs.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgerfs.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */
+/*                          X, LDX, FERR, BERR, WORK, IWORK, INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
+/*      $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGERFS improves the computed solution to a system of linear */
+/* > equations and provides error bounds and backward error estimates for */
+/* > the solution. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations: */
+/* >          = 'N':  A * X = B     (No transpose) */
+/* >          = 'T':  A**T * X = B  (Transpose) */
+/* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          The original N-by-N matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AF */
+/* > \verbatim */
+/* >          AF is REAL array, dimension (LDAF,N) */
+/* >          The factors L and U from the factorization A = P*L*U */
+/* >          as computed by SGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices from SGETRF; for 1<=i<=N, row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          The right hand side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension (LDX,NRHS) */
+/* >          On entry, the solution matrix X, as computed by SGETRS. */
+/* >          On exit, the improved solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] FERR */
+/* > \verbatim */
+/* >          FERR is REAL array, dimension (NRHS) */
+/* >          The estimated forward error bound for each solution vector */
+/* >          X(j) (the j-th column of the solution matrix X). */
+/* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
+/* >          is an estimated upper bound for the magnitude of the largest */
+/* >          element in (X(j) - XTRUE) divided by the magnitude of the */
+/* >          largest element in X(j).  The estimate is as reliable as */
+/* >          the estimate for RCOND, and is almost always a slight */
+/* >          overestimate of the true error. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is REAL array, dimension (NRHS) */
+/* >          The componentwise relative backward error of each solution */
+/* >          vector X(j) (i.e., the smallest relative change in */
+/* >          any element of A or B that makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (3*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/* > \par Internal Parameters: */
+/*  ========================= */
+/* > */
+/* > \verbatim */
+/* >  ITMAX is the maximum number of steps of iterative refinement. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgerfs_(char *trans, integer *n, integer *nrhs, real *a, 
+	integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, 
+	integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *
+	work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
+	    x_offset, i__1, i__2, i__3;
+    real r__1, r__2, r__3;
+
+    /* Local variables */
+    integer kase;
+    real safe1, safe2;
+    integer i__, j, k;
+    real s;
+    extern logical lsame_(char *, char *);
+    integer isave[3];
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    integer count;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *), saxpy_(integer *, real *, real *, integer *, real *, 
+	    integer *), slacn2_(integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    real xk;
+    extern real slamch_(char *);
+    integer nz;
+    real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical notran;
+    extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, 
+	    integer *, integer *, real *, integer *, integer *);
+    char transt[1];
+    real lstres, eps;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --ferr;
+    --berr;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldaf < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -10;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGERFS", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    ferr[j] = 0.f;
+	    berr[j] = 0.f;
+/* L10: */
+	}
+	return 0;
+    }
+
+    if (notran) {
+	*(unsigned char *)transt = 'T';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */
+
+    nz = *n + 1;
+    eps = slamch_("Epsilon");
+    safmin = slamch_("Safe minimum");
+    safe1 = nz * safmin;
+    safe2 = safe1 / eps;
+
+/*     Do for each right hand side */
+
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+
+	count = 1;
+	lstres = 3.f;
+L20:
+
+/*        Loop until stopping criterion is satisfied. */
+
+/*        Compute residual R = B - op(A) * X, */
+/*        where op(A) = A, A**T, or A**H, depending on TRANS. */
+
+	scopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
+	sgemv_(trans, n, n, &c_b15, &a[a_offset], lda, &x[j * x_dim1 + 1], &
+		c__1, &c_b17, &work[*n + 1], &c__1);
+
+/*        Compute componentwise relative backward error from formula */
+
+/*        f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */
+
+/*        where abs(Z) is the componentwise absolute value of the matrix */
+/*        or vector Z.  If the i-th component of the denominator is less */
+/*        than SAFE2, then SAFE1 is added to the i-th components of the */
+/*        numerator and denominator before dividing. */
+
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1));
+/* L30: */
+	}
+
+/*        Compute abs(op(A))*abs(X) + abs(B). */
+
+	if (notran) {
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		xk = (r__1 = x[k + j * x_dim1], abs(r__1));
+		i__3 = *n;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    work[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * xk;
+/* L40: */
+		}
+/* L50: */
+	    }
+	} else {
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		s = 0.f;
+		i__3 = *n;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    s += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 = x[
+			    i__ + j * x_dim1], abs(r__2));
+/* L60: */
+		}
+		work[k] += s;
+/* L70: */
+	    }
+	}
+	s = 0.f;
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    if (work[i__] > safe2) {
+/* Computing MAX */
+		r__2 = s, r__3 = (r__1 = work[*n + i__], abs(r__1)) / work[
+			i__];
+		s = f2cmax(r__2,r__3);
+	    } else {
+/* Computing MAX */
+		r__2 = s, r__3 = ((r__1 = work[*n + i__], abs(r__1)) + safe1) 
+			/ (work[i__] + safe1);
+		s = f2cmax(r__2,r__3);
+	    }
+/* L80: */
+	}
+	berr[j] = s;
+
+/*        Test stopping criterion. Continue iterating if */
+/*           1) The residual BERR(J) is larger than machine epsilon, and */
+/*           2) BERR(J) decreased by at least a factor of 2 during the */
+/*              last iteration, and */
+/*           3) At most ITMAX iterations tried. */
+
+	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {
+
+/*           Update solution and try again. */
+
+	    sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n 
+		    + 1], n, info);
+	    saxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
+		    ;
+	    lstres = berr[j];
+	    ++count;
+	    goto L20;
+	}
+
+/*        Bound error from formula */
+
+/*        norm(X - XTRUE) / norm(X) .le. FERR = */
+/*        norm( abs(inv(op(A)))* */
+/*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */
+
+/*        where */
+/*          norm(Z) is the magnitude of the largest component of Z */
+/*          inv(op(A)) is the inverse of op(A) */
+/*          abs(Z) is the componentwise absolute value of the matrix or */
+/*             vector Z */
+/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
+/*          EPS is machine epsilon */
+
+/*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
+/*        is incremented by SAFE1 if the i-th component of */
+/*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */
+
+/*        Use SLACN2 to estimate the infinity-norm of the matrix */
+/*           inv(op(A)) * diag(W), */
+/*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
+
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    if (work[i__] > safe2) {
+		work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * 
+			work[i__];
+	    } else {
+		work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * 
+			work[i__] + safe1;
+	    }
+/* L90: */
+	}
+
+	kase = 0;
+L100:
+	slacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
+		kase, isave);
+	if (kase != 0) {
+	    if (kase == 1) {
+
+/*              Multiply by diag(W)*inv(op(A)**T). */
+
+		sgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
+			work[*n + 1], n, info);
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    work[*n + i__] = work[i__] * work[*n + i__];
+/* L110: */
+		}
+	    } else {
+
+/*              Multiply by inv(op(A))*diag(W). */
+
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    work[*n + i__] = work[i__] * work[*n + i__];
+/* L120: */
+		}
+		sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
+			work[*n + 1], n, info);
+	    }
+	    goto L100;
+	}
+
+/*        Normalize error. */
+
+	lstres = 0.f;
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    r__2 = lstres, r__3 = (r__1 = x[i__ + j * x_dim1], abs(r__1));
+	    lstres = f2cmax(r__2,r__3);
+/* L130: */
+	}
+	if (lstres != 0.f) {
+	    ferr[j] /= lstres;
+	}
+
+/* L140: */
+    }
+
+    return 0;
+
+/*     End of SGERFS */
+
+} /* sgerfs_ */
+
diff --git a/lapack-netlib/SRC/sgerfsx.c b/lapack-netlib/SRC/sgerfsx.c
new file mode 100644
index 000000000..fb494094f
--- /dev/null
+++ b/lapack-netlib/SRC/sgerfsx.c
@@ -0,0 +1,1146 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__0 = 0;
+static integer c__1 = 1;
+
+/* > \brief \b SGERFSX */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGERFSX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgerfsx
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgerfsx
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgerfsx
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, */
+/*                           R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, */
+/*                           ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, */
+/*                           WORK, IWORK, INFO ) */
+
+/*       CHARACTER          TRANS, EQUED */
+/*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */
+/*      $                   N_ERR_BNDS */
+/*       REAL               RCOND */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
+/*      $                   X( LDX , * ), WORK( * ) */
+/*       REAL               R( * ), C( * ), PARAMS( * ), BERR( * ), */
+/*      $                   ERR_BNDS_NORM( NRHS, * ), */
+/*      $                   ERR_BNDS_COMP( NRHS, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    SGERFSX improves the computed solution to a system of linear */
+/* >    equations and provides error bounds and backward error estimates */
+/* >    for the solution.  In addition to normwise error bound, the code */
+/* >    provides maximum componentwise error bound if possible.  See */
+/* >    comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */
+/* >    error bounds. */
+/* > */
+/* >    The original system of linear equations may have been equilibrated */
+/* >    before calling this routine, as described by arguments EQUED, R */
+/* >    and C below. In this case, the solution and error bounds returned */
+/* >    are for the original unequilibrated system. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \verbatim */
+/* >     Some optional parameters are bundled in the PARAMS array.  These */
+/* >     settings determine how refinement is performed, but often the */
+/* >     defaults are acceptable.  If the defaults are acceptable, users */
+/* >     can pass NPARAMS = 0 which prevents the source code from accessing */
+/* >     the PARAMS argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >     Specifies the form of the system of equations: */
+/* >       = 'N':  A * X = B     (No transpose) */
+/* >       = 'T':  A**T * X = B  (Transpose) */
+/* >       = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] EQUED */
+/* > \verbatim */
+/* >          EQUED is CHARACTER*1 */
+/* >     Specifies the form of equilibration that was done to A */
+/* >     before calling this routine. This is needed to compute */
+/* >     the solution and error bounds correctly. */
+/* >       = 'N':  No equilibration */
+/* >       = 'R':  Row equilibration, i.e., A has been premultiplied by */
+/* >               diag(R). */
+/* >       = 'C':  Column equilibration, i.e., A has been postmultiplied */
+/* >               by diag(C). */
+/* >       = 'B':  Both row and column equilibration, i.e., A has been */
+/* >               replaced by diag(R) * A * diag(C). */
+/* >               The right hand side B has been changed accordingly. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >     The number of right hand sides, i.e., the number of columns */
+/* >     of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >     The original N-by-N matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >     The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AF */
+/* > \verbatim */
+/* >          AF is REAL array, dimension (LDAF,N) */
+/* >     The factors L and U from the factorization A = P*L*U */
+/* >     as computed by SGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >     The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     The pivot indices from SGETRF; for 1<=i<=N, row i of the */
+/* >     matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] R */
+/* > \verbatim */
+/* >          R is REAL array, dimension (N) */
+/* >     The row scale factors for A.  If EQUED = 'R' or 'B', A is */
+/* >     multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */
+/* >     is not accessed. */
+/* >     If R is accessed, each element of R should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (N) */
+/* >     The column scale factors for A.  If EQUED = 'C' or 'B', A is */
+/* >     multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */
+/* >     is not accessed. */
+/* >     If C is accessed, each element of C should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >     The right hand side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >     The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension (LDX,NRHS) */
+/* >     On entry, the solution matrix X, as computed by SGETRS. */
+/* >     On exit, the improved solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >     The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >     Reciprocal scaled condition number.  This is an estimate of the */
+/* >     reciprocal Skeel condition number of the matrix A after */
+/* >     equilibration (if done).  If this is less than the machine */
+/* >     precision (in particular, if it is zero), the matrix is singular */
+/* >     to working precision.  Note that the error may still be small even */
+/* >     if this number is very small and the matrix appears ill- */
+/* >     conditioned. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is REAL array, dimension (NRHS) */
+/* >     Componentwise relative backward error.  This is the */
+/* >     componentwise relative backward error of each solution vector X(j) */
+/* >     (i.e., the smallest relative change in any element of A or B that */
+/* >     makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N_ERR_BNDS */
+/* > \verbatim */
+/* >          N_ERR_BNDS is INTEGER */
+/* >     Number of error bounds to return for each right hand side */
+/* >     and each type (normwise or componentwise).  See ERR_BNDS_NORM and */
+/* >     ERR_BNDS_COMP below. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_NORM */
+/* > \verbatim */
+/* >          ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     normwise relative error, which is defined as follows: */
+/* > */
+/* >     Normwise relative error in the ith solution vector: */
+/* >             max_j (abs(XTRUE(j,i) - X(j,i))) */
+/* >            ------------------------------ */
+/* >                  max_j abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the type of error information as described */
+/* >     below. There currently are up to three pieces of information */
+/* >     returned. */
+/* > */
+/* >     The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_NORM(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated normwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * slamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*A, where S scales each row by a power of the */
+/* >              radix so all absolute row sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_COMP */
+/* > \verbatim */
+/* >          ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     componentwise relative error, which is defined as follows: */
+/* > */
+/* >     Componentwise relative error in the ith solution vector: */
+/* >                    abs(XTRUE(j,i) - X(j,i)) */
+/* >             max_j ---------------------- */
+/* >                         abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the right-hand side i (on which the */
+/* >     componentwise relative error depends), and the type of error */
+/* >     information as described below. There currently are up to three */
+/* >     pieces of information returned for each right-hand side. If */
+/* >     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
+/* >     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most */
+/* >     the first (:,N_ERR_BNDS) entries are returned. */
+/* > */
+/* >     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_COMP(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated componentwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * slamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*(A*diag(x)), where x is the solution for the */
+/* >              current right-hand side and S scales each row of */
+/* >              A*diag(x) by a power of the radix so all absolute row */
+/* >              sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NPARAMS */
+/* > \verbatim */
+/* >          NPARAMS is INTEGER */
+/* >     Specifies the number of parameters set in PARAMS.  If <= 0, the */
+/* >     PARAMS array is never referenced and default values are used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] PARAMS */
+/* > \verbatim */
+/* >          PARAMS is REAL array, dimension NPARAMS */
+/* >     Specifies algorithm parameters.  If an entry is < 0.0, then */
+/* >     that entry will be filled with default value used for that */
+/* >     parameter.  Only positions up to NPARAMS are accessed; defaults */
+/* >     are used for higher-numbered parameters. */
+/* > */
+/* >       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */
+/* >            refinement or not. */
+/* >         Default: 1.0 */
+/* >            = 0.0:  No refinement is performed, and no error bounds are */
+/* >                    computed. */
+/* >            = 1.0:  Use the double-precision refinement algorithm, */
+/* >                    possibly with doubled-single computations if the */
+/* >                    compilation environment does not support DOUBLE */
+/* >                    PRECISION. */
+/* >              (other values are reserved for future use) */
+/* > */
+/* >       PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */
+/* >            computations allowed for refinement. */
+/* >         Default: 10 */
+/* >         Aggressive: Set to 100 to permit convergence using approximate */
+/* >                     factorizations or factorizations other than LU. If */
+/* >                     the factorization uses a technique other than */
+/* >                     Gaussian elimination, the guarantees in */
+/* >                     err_bnds_norm and err_bnds_comp may no longer be */
+/* >                     trustworthy. */
+/* > */
+/* >       PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */
+/* >            will attempt to find a solution with small componentwise */
+/* >            relative error in the double-precision algorithm.  Positive */
+/* >            is true, 0.0 is false. */
+/* >         Default: 1.0 (attempt componentwise convergence) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (4*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. The solution to every right-hand side is */
+/* >         guaranteed. */
+/* >       < 0:  If INFO = -i, the i-th argument had an illegal value */
+/* >       > 0 and <= N:  U(INFO,INFO) is exactly zero.  The factorization */
+/* >         has been completed, but the factor U is exactly singular, so */
+/* >         the solution and error bounds could not be computed. RCOND = 0 */
+/* >         is returned. */
+/* >       = N+J: The solution corresponding to the Jth right-hand side is */
+/* >         not guaranteed. The solutions corresponding to other right- */
+/* >         hand sides K with K > J may not be guaranteed as well, but */
+/* >         only the first such right-hand side is reported. If a small */
+/* >         componentwise error is not requested (PARAMS(3) = 0.0) then */
+/* >         the Jth right-hand side is the first with a normwise error */
+/* >         bound that is not guaranteed (the smallest J such */
+/* >         that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */
+/* >         the Jth right-hand side is the first with either a normwise or */
+/* >         componentwise error bound that is not guaranteed (the smallest */
+/* >         J such that either ERR_BNDS_NORM(J,1) = 0.0 or */
+/* >         ERR_BNDS_COMP(J,1) = 0.0). See the definition of */
+/* >         ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */
+/* >         about all of the right-hand sides check ERR_BNDS_NORM or */
+/* >         ERR_BNDS_COMP. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgerfsx_(char *trans, char *equed, integer *n, integer *
+	nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, 
+	real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, 
+	real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__,
+	 real *err_bnds_comp__, integer *nparams, real *params, real *work, 
+	integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
+	    x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, 
+	    err_bnds_comp_dim1, err_bnds_comp_offset, i__1;
+    real r__1, r__2;
+
+    /* Local variables */
+    real illrcond_thresh__, unstable_thresh__;
+    extern /* Subroutine */ int sla_gerfsx_extended_(integer *, integer *, 
+	    integer *, integer *, real *, integer *, real *, integer *, 
+	    integer *, logical *, real *, real *, integer *, real *, integer *
+	    , real *, integer *, real *, real *, real *, real *, real *, real 
+	    *, real *, integer *, real *, real *, logical *, integer *);
+    real err_lbnd__;
+    char norm[1];
+    integer ref_type__;
+    extern integer ilatrans_(char *);
+    logical ignore_cwise__;
+    integer j;
+    extern logical lsame_(char *, char *);
+    real anorm, rcond_tmp__;
+    integer prec_type__;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgecon_(
+	    char *, integer *, real *, integer *, real *, real *, real *, 
+	    integer *, integer *);
+    logical colequ, notran, rowequ;
+    integer trans_type__;
+    extern integer ilaprec_(char *);
+    extern real sla_gercond_(char *, integer *, real *, integer *, real *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *);
+    integer ithresh, n_norms__;
+    real rthresh, cwise_wrong__;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ================================================================== */
+
+
+/*     Check the input parameters. */
+
+    /* Parameter adjustments */
+    err_bnds_comp_dim1 = *nrhs;
+    err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1;
+    err_bnds_comp__ -= err_bnds_comp_offset;
+    err_bnds_norm_dim1 = *nrhs;
+    err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1;
+    err_bnds_norm__ -= err_bnds_norm_offset;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    --r__;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --berr;
+    --params;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    trans_type__ = ilatrans_(trans);
+    ref_type__ = 1;
+    if (*nparams >= 1) {
+	if (params[1] < 0.f) {
+	    params[1] = 1.f;
+	} else {
+	    ref_type__ = params[1];
+	}
+    }
+
+/*     Set default parameters. */
+
+    illrcond_thresh__ = (real) (*n) * slamch_("Epsilon");
+    ithresh = 10;
+    rthresh = .5f;
+    unstable_thresh__ = .25f;
+    ignore_cwise__ = FALSE_;
+
+    if (*nparams >= 2) {
+	if (params[2] < 0.f) {
+	    params[2] = (real) ithresh;
+	} else {
+	    ithresh = (integer) params[2];
+	}
+    }
+    if (*nparams >= 3) {
+	if (params[3] < 0.f) {
+	    if (ignore_cwise__) {
+		params[3] = 0.f;
+	    } else {
+		params[3] = 1.f;
+	    }
+	} else {
+	    ignore_cwise__ = params[3] == 0.f;
+	}
+    }
+    if (ref_type__ == 0 || *n_err_bnds__ == 0) {
+	n_norms__ = 0;
+    } else if (ignore_cwise__) {
+	n_norms__ = 1;
+    } else {
+	n_norms__ = 2;
+    }
+
+    notran = lsame_(trans, "N");
+    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
+    colequ = lsame_(equed, "C") || lsame_(equed, "B");
+
+/*     Test input parameters. */
+
+    if (trans_type__ == -1) {
+	*info = -1;
+    } else if (! rowequ && ! colequ && ! lsame_(equed, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -6;
+    } else if (*ldaf < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -13;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -15;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGERFSX", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || *nrhs == 0) {
+	*rcond = 1.f;
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    berr[j] = 0.f;
+	    if (*n_err_bnds__ >= 1) {
+		err_bnds_norm__[j + err_bnds_norm_dim1] = 1.f;
+		err_bnds_comp__[j + err_bnds_comp_dim1] = 1.f;
+	    }
+	    if (*n_err_bnds__ >= 2) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.f;
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.f;
+	    }
+	    if (*n_err_bnds__ >= 3) {
+		err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.f;
+		err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.f;
+	    }
+	}
+	return 0;
+    }
+
+/*     Default to failure. */
+
+    *rcond = 0.f;
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+	berr[j] = 1.f;
+	if (*n_err_bnds__ >= 1) {
+	    err_bnds_norm__[j + err_bnds_norm_dim1] = 1.f;
+	    err_bnds_comp__[j + err_bnds_comp_dim1] = 1.f;
+	}
+	if (*n_err_bnds__ >= 2) {
+	    err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.f;
+	    err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.f;
+	}
+	if (*n_err_bnds__ >= 3) {
+	    err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.f;
+	    err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.f;
+	}
+    }
+
+/*     Compute the norm of A and the reciprocal of the condition */
+/*     number of A. */
+
+    if (notran) {
+	*(unsigned char *)norm = 'I';
+    } else {
+	*(unsigned char *)norm = '1';
+    }
+    anorm = slange_(norm, n, n, &a[a_offset], lda, &work[1]);
+    sgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1],
+	     info);
+
+/*     Perform refinement on each right-hand side */
+
+    if (ref_type__ != 0) {
+	prec_type__ = ilaprec_("D");
+	if (notran) {
+	    sla_gerfsx_extended_(&prec_type__, &trans_type__, n, nrhs, &a[
+		    a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &colequ, &
+		    c__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], &
+		    n_norms__, &err_bnds_norm__[err_bnds_norm_offset], &
+		    err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], &
+		    work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, 
+		    &rthresh, &unstable_thresh__, &ignore_cwise__, info);
+	} else {
+	    sla_gerfsx_extended_(&prec_type__, &trans_type__, n, nrhs, &a[
+		    a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &rowequ, &
+		    r__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], &
+		    n_norms__, &err_bnds_norm__[err_bnds_norm_offset], &
+		    err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], &
+		    work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, 
+		    &rthresh, &unstable_thresh__, &ignore_cwise__, info);
+	}
+    }
+/* Computing MAX */
+    r__1 = 10.f, r__2 = sqrt((real) (*n));
+    err_lbnd__ = f2cmax(r__1,r__2) * slamch_("Epsilon");
+    if (*n_err_bnds__ >= 1 && n_norms__ >= 1) {
+
+/*     Compute scaled normwise condition number cond(A*C). */
+
+	if (colequ && notran) {
+	    rcond_tmp__ = sla_gercond_(trans, n, &a[a_offset], lda, &af[
+		    af_offset], ldaf, &ipiv[1], &c_n1, &c__[1], info, &work[1]
+		    , &iwork[1]);
+	} else if (rowequ && ! notran) {
+	    rcond_tmp__ = sla_gercond_(trans, n, &a[a_offset], lda, &af[
+		    af_offset], ldaf, &ipiv[1], &c_n1, &r__[1], info, &work[1]
+		    , &iwork[1]);
+	} else {
+	    rcond_tmp__ = sla_gercond_(trans, n, &a[a_offset], lda, &af[
+		    af_offset], ldaf, &ipiv[1], &c__0, &r__[1], info, &work[1]
+		    , &iwork[1]);
+	}
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+
+/*     Cap the error at 1.0. */
+
+	    if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 
+		    << 1)] > 1.f) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.f;
+	    }
+
+/*     Threshold the error (see LAWN). */
+
+	    if (rcond_tmp__ < illrcond_thresh__) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.f;
+		err_bnds_norm__[j + err_bnds_norm_dim1] = 0.f;
+		if (*info <= *n) {
+		    *info = *n + j;
+		}
+	    } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < 
+		    err_lbnd__) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__;
+		err_bnds_norm__[j + err_bnds_norm_dim1] = 1.f;
+	    }
+
+/*     Save the condition number. */
+
+	    if (*n_err_bnds__ >= 3) {
+		err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__;
+	    }
+	}
+    }
+    if (*n_err_bnds__ >= 1 && n_norms__ >= 2) {
+
+/*     Compute componentwise condition number cond(A*diag(Y(:,J))) for */
+/*     each right-hand side using the current solution as an estimate of */
+/*     the true solution.  If the componentwise error estimate is too */
+/*     large, then the solution is a lousy estimate of truth and the */
+/*     estimated RCOND may be too optimistic.  To avoid misleading users, */
+/*     the inverse condition number is set to 0.0 when the estimated */
+/*     cwise error is at least CWISE_WRONG. */
+
+	cwise_wrong__ = sqrt(slamch_("Epsilon"));
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < 
+		    cwise_wrong__) {
+		rcond_tmp__ = sla_gercond_(trans, n, &a[a_offset], lda, &af[
+			af_offset], ldaf, &ipiv[1], &c__1, &x[j * x_dim1 + 1],
+			 info, &work[1], &iwork[1]);
+	    } else {
+		rcond_tmp__ = 0.f;
+	    }
+
+/*     Cap the error at 1.0. */
+
+	    if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 
+		    << 1)] > 1.f) {
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.f;
+	    }
+
+/*     Threshold the error (see LAWN). */
+
+	    if (rcond_tmp__ < illrcond_thresh__) {
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.f;
+		err_bnds_comp__[j + err_bnds_comp_dim1] = 0.f;
+		if (params[3] == 1.f && *info < *n + j) {
+		    *info = *n + j;
+		}
+	    } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < 
+		    err_lbnd__) {
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__;
+		err_bnds_comp__[j + err_bnds_comp_dim1] = 1.f;
+	    }
+
+/*     Save the condition number. */
+
+	    if (*n_err_bnds__ >= 3) {
+		err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SGERFSX */
+
+} /* sgerfsx_ */
+
diff --git a/lapack-netlib/SRC/sgerq2.c b/lapack-netlib/SRC/sgerq2.c
new file mode 100644
index 000000000..ec6d0a5e8
--- /dev/null
+++ b/lapack-netlib/SRC/sgerq2.c
@@ -0,0 +1,587 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorit
+hm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGERQ2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgerq2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgerq2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgerq2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGERQ2 computes an RQ factorization of a real m by n matrix A: */
+/* > A = R * Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, if m <= n, the upper triangle of the subarray */
+/* >          A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */
+/* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
+/* >          contain the m by n upper trapezoidal matrix R; the remaining */
+/* >          elements, with the array TAU, represent the orthogonal matrix */
+/* >          Q as a product of elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (M) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
+/* >  A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgerq2_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *, ftnlen), slarfg_(integer *, real *, real *, 
+	    integer *, real *);
+    real aii;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGERQ2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    for (i__ = k; i__ >= 1; --i__) {
+
+/*        Generate elementary reflector H(i) to annihilate */
+/*        A(m-k+i,1:n-k+i-1) */
+
+	i__1 = *n - k + i__;
+	slarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[*m - k 
+		+ i__ + a_dim1], lda, &tau[i__]);
+
+/*        Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */
+
+	aii = a[*m - k + i__ + (*n - k + i__) * a_dim1];
+	a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.f;
+	i__1 = *m - k + i__ - 1;
+	i__2 = *n - k + i__;
+	slarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[
+		i__], &a[a_offset], lda, &work[1]);
+	a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGERQ2 */
+
+} /* sgerq2_ */
+
diff --git a/lapack-netlib/SRC/sgerqf.c b/lapack-netlib/SRC/sgerqf.c
new file mode 100644
index 000000000..5a0024da9
--- /dev/null
+++ b/lapack-netlib/SRC/sgerqf.c
@@ -0,0 +1,711 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b SGERQF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGERQF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgerqf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgerqf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgerqf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       REAL               A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGERQF computes an RQ factorization of a real M-by-N matrix A: */
+/* > A = R * Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          if m <= n, the upper triangle of the subarray */
+/* >          A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */
+/* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
+/* >          contain the M-by-N upper trapezoidal matrix R; */
+/* >          the remaining elements, with the array TAU, represent the */
+/* >          orthogonal matrix Q as a product of f2cmin(m,n) elementary */
+/* >          reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,M). */
+/* >          For optimum performance LWORK >= M*NB, where NB is */
+/* >          the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
+/* >  A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgerqf_(integer *m, integer *n, real *a, integer *lda, 
+	real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo;
+    extern /* Subroutine */ int sgerq2_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *);
+    integer ib, nb, ki, kk, mu, nu, nx;
+    extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, 
+	    real *, integer *, real *, real *, integer *);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*lwork < f2cmax(1,*m) && ! lquery) {
+	*info = -7;
+    }
+
+    if (*info == 0) {
+	k = f2cmin(*m,*n);
+	if (k == 0) {
+	    lwkopt = 1;
+	} else {
+	    nb = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    lwkopt = *m * nb;
+	    work[1] = (real) lwkopt;
+	}
+	work[1] = (real) lwkopt;
+
+	if (*lwork < f2cmax(1,*m) && ! lquery) {
+	    *info = -7;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGERQF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (k == 0) {
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 1;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SGERQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGERQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially. */
+/*        The last kk rows are handled by the block method. */
+
+	ki = (k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = k, i__2 = ki + nb;
+	kk = f2cmin(i__1,i__2);
+
+	i__1 = k - kk + 1;
+	i__2 = -nb;
+	for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ 
+		+= i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the RQ factorization of the current block */
+/*           A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */
+
+	    i__3 = *n - k + i__ + ib - 1;
+	    sgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+	    if (*m - k + i__ > 1) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i+ib-1) . . . H(i+1) H(i) */
+
+		i__3 = *n - k + i__ + ib - 1;
+		slarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + 
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */
+
+		i__3 = *m - k + i__ - 1;
+		i__4 = *n - k + i__ + ib - 1;
+		slarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3,
+			 &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1],
+			 &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork);
+	    }
+/* L10: */
+	}
+	mu = *m - k + i__ + nb - 1;
+	nu = *n - k + i__ + nb - 1;
+    } else {
+	mu = *m;
+	nu = *n;
+    }
+
+/*     Use unblocked code to factor the last or only block */
+
+    if (mu > 0 && nu > 0) {
+	sgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGERQF */
+
+} /* sgerqf_ */
+
diff --git a/lapack-netlib/SRC/sgesc2.c b/lapack-netlib/SRC/sgesc2.c
new file mode 100644
index 000000000..6634cde8e
--- /dev/null
+++ b/lapack-netlib/SRC/sgesc2.c
@@ -0,0 +1,604 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+
+/* > \brief \b SGESC2 solves a system of linear equations using the LU factorization with complete pivoting co
+mputed by sgetc2. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGESC2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesc2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesc2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesc2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) */
+
+/*       INTEGER            LDA, N */
+/*       REAL               SCALE */
+/*       INTEGER            IPIV( * ), JPIV( * ) */
+/*       REAL               A( LDA, * ), RHS( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGESC2 solves a system of linear equations */
+/* > */
+/* >           A * X = scale* RHS */
+/* > */
+/* > with a general N-by-N matrix A using the LU factorization with */
+/* > complete pivoting computed by SGETC2. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the  LU part of the factorization of the n-by-n */
+/* >          matrix A computed by SGETC2:  A = P * L * U * Q */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1, N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] RHS */
+/* > \verbatim */
+/* >          RHS is REAL array, dimension (N). */
+/* >          On entry, the right hand side vector b. */
+/* >          On exit, the solution vector X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N). */
+/* >          The pivot indices; for 1 <= i <= N, row i of the */
+/* >          matrix has been interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JPIV */
+/* > \verbatim */
+/* >          JPIV is INTEGER array, dimension (N). */
+/* >          The pivot indices; for 1 <= j <= N, column j of the */
+/* >          matrix has been interchanged with column JPIV(j). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is REAL */
+/* >           On exit, SCALE contains the scale factor. SCALE is chosen */
+/* >           0 <= SCALE <= 1 to prevent overflow in the solution. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEauxiliary */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
+/* >     Umea University, S-901 87 Umea, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgesc2_(integer *n, real *a, integer *lda, real *rhs, 
+	integer *ipiv, integer *jpiv, real *scale)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Local variables */
+    real temp;
+    integer i__, j;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
+	    slabad_(real *, real *);
+    extern real slamch_(char *);
+    real bignum;
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
+	    *, integer *, integer *, integer *);
+    real smlnum, eps;
+
+
+/*  -- LAPACK auxiliary routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*      Set constant to control overflow */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --rhs;
+    --ipiv;
+    --jpiv;
+
+    /* Function Body */
+    eps = slamch_("P");
+    smlnum = slamch_("S") / eps;
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+
+/*     Apply permutations IPIV to RHS */
+
+    i__1 = *n - 1;
+    slaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1);
+
+/*     Solve for L part */
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *n;
+	for (j = i__ + 1; j <= i__2; ++j) {
+	    rhs[j] -= a[j + i__ * a_dim1] * rhs[i__];
+/* L10: */
+	}
+/* L20: */
+    }
+
+/*     Solve for U part */
+
+    *scale = 1.f;
+
+/*     Check for scaling */
+
+    i__ = isamax_(n, &rhs[1], &c__1);
+    if (smlnum * 2.f * (r__1 = rhs[i__], abs(r__1)) > (r__2 = a[*n + *n * 
+	    a_dim1], abs(r__2))) {
+	temp = .5f / (r__1 = rhs[i__], abs(r__1));
+	sscal_(n, &temp, &rhs[1], &c__1);
+	*scale *= temp;
+    }
+
+    for (i__ = *n; i__ >= 1; --i__) {
+	temp = 1.f / a[i__ + i__ * a_dim1];
+	rhs[i__] *= temp;
+	i__1 = *n;
+	for (j = i__ + 1; j <= i__1; ++j) {
+	    rhs[i__] -= rhs[j] * (a[i__ + j * a_dim1] * temp);
+/* L30: */
+	}
+/* L40: */
+    }
+
+/*     Apply permutations JPIV to the solution (RHS) */
+
+    i__1 = *n - 1;
+    slaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1);
+    return 0;
+
+/*     End of SGESC2 */
+
+} /* sgesc2_ */
+
diff --git a/lapack-netlib/SRC/sgesdd.c b/lapack-netlib/SRC/sgesdd.c
new file mode 100644
index 000000000..1429b40c1
--- /dev/null
+++ b/lapack-netlib/SRC/sgesdd.c
@@ -0,0 +1,2165 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__0 = 0;
+static real c_b63 = 0.f;
+static integer c__1 = 1;
+static real c_b84 = 1.f;
+
+/* > \brief \b SGESDD */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGESDD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesdd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesdd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesdd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, */
+/*                          WORK, LWORK, IWORK, INFO ) */
+
+/*       CHARACTER          JOBZ */
+/*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N */
+/*       INTEGER            IWORK( * ) */
+/*       REAL   A( LDA, * ), S( * ), U( LDU, * ), */
+/*      $                   VT( LDVT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGESDD computes the singular value decomposition (SVD) of a real */
+/* > M-by-N matrix A, optionally computing the left and right singular */
+/* > vectors.  If singular vectors are desired, it uses a */
+/* > divide-and-conquer algorithm. */
+/* > */
+/* > The SVD is written */
+/* > */
+/* >      A = U * SIGMA * transpose(V) */
+/* > */
+/* > where SIGMA is an M-by-N matrix which is zero except for its */
+/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */
+/* > V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA */
+/* > are the singular values of A; they are real and non-negative, and */
+/* > are returned in descending order.  The first f2cmin(m,n) columns of */
+/* > U and V are the left and right singular vectors of A. */
+/* > */
+/* > Note that the routine returns VT = V**T, not V. */
+/* > */
+/* > The divide and conquer algorithm makes very mild assumptions about */
+/* > floating point arithmetic. It will work on machines with a guard */
+/* > digit in add/subtract, or on those binary machines without guard */
+/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
+/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
+/* > without guard digits, but we know of none. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBZ */
+/* > \verbatim */
+/* >          JOBZ is CHARACTER*1 */
+/* >          Specifies options for computing all or part of the matrix U: */
+/* >          = 'A':  all M columns of U and all N rows of V**T are */
+/* >                  returned in the arrays U and VT; */
+/* >          = 'S':  the first f2cmin(M,N) columns of U and the first */
+/* >                  f2cmin(M,N) rows of V**T are returned in the arrays U */
+/* >                  and VT; */
+/* >          = 'O':  If M >= N, the first N columns of U are overwritten */
+/* >                  on the array A and all rows of V**T are returned in */
+/* >                  the array VT; */
+/* >                  otherwise, all columns of U are returned in the */
+/* >                  array U and the first M rows of V**T are overwritten */
+/* >                  in the array A; */
+/* >          = 'N':  no columns of U or rows of V**T are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          if JOBZ = 'O',  A is overwritten with the first N columns */
+/* >                          of U (the left singular vectors, stored */
+/* >                          columnwise) if M >= N; */
+/* >                          A is overwritten with the first M rows */
+/* >                          of V**T (the right singular vectors, stored */
+/* >                          rowwise) otherwise. */
+/* >          if JOBZ .ne. 'O', the contents of A are destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is REAL array, dimension (f2cmin(M,N)) */
+/* >          The singular values of A, sorted so that S(i) >= S(i+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is REAL array, dimension (LDU,UCOL) */
+/* >          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; */
+/* >          UCOL = f2cmin(M,N) if JOBZ = 'S'. */
+/* >          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M */
+/* >          orthogonal matrix U; */
+/* >          if JOBZ = 'S', U contains the first f2cmin(M,N) columns of U */
+/* >          (the left singular vectors, stored columnwise); */
+/* >          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U.  LDU >= 1; if */
+/* >          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VT */
+/* > \verbatim */
+/* >          VT is REAL array, dimension (LDVT,N) */
+/* >          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the */
+/* >          N-by-N orthogonal matrix V**T; */
+/* >          if JOBZ = 'S', VT contains the first f2cmin(M,N) rows of */
+/* >          V**T (the right singular vectors, stored rowwise); */
+/* >          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVT */
+/* > \verbatim */
+/* >          LDVT is INTEGER */
+/* >          The leading dimension of the array VT.  LDVT >= 1; */
+/* >          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; */
+/* >          if JOBZ = 'S', LDVT >= f2cmin(M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= 1. */
+/* >          If LWORK = -1, a workspace query is assumed.  The optimal */
+/* >          size for the WORK array is calculated and stored in WORK(1), */
+/* >          and no other work except argument checking is performed. */
+/* > */
+/* >          Let mx = f2cmax(M,N) and mn = f2cmin(M,N). */
+/* >          If JOBZ = 'N', LWORK >= 3*mn + f2cmax( mx, 7*mn ). */
+/* >          If JOBZ = 'O', LWORK >= 3*mn + f2cmax( mx, 5*mn*mn + 4*mn ). */
+/* >          If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn. */
+/* >          If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx. */
+/* >          These are not tight minimums in all cases; see comments inside code. */
+/* >          For good performance, LWORK should generally be larger; */
+/* >          a query is recommended. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (8*f2cmin(M,N)) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  SBDSDC did not converge, updating process failed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup realGEsing */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >     Ming Gu and Huan Ren, Computer Science Division, University of */
+/* >     California at Berkeley, USA */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgesdd_(char *jobz, integer *m, integer *n, real *a, 
+	integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt,
+	 real *work, integer *lwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, 
+	    i__2, i__3;
+
+    /* Local variables */
+    integer lwork_sgelqf_mn__, lwork_sgeqrf_mn__, iscl, lwork_sorglq_mn__, 
+	    lwork_sorglq_nn__;
+    real anrm;
+    integer idum[1], ierr, itau, lwork_sorgqr_mm__, lwork_sorgqr_mn__, 
+	    lwork_sormbr_qln_mm__, lwork_sormbr_qln_mn__, 
+	    lwork_sormbr_qln_nn__, lwork_sormbr_prt_mm__, 
+	    lwork_sormbr_prt_mn__, lwork_sormbr_prt_nn__, i__;
+    extern logical lsame_(char *, char *);
+    integer chunk;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *);
+    integer minmn, wrkbl, itaup, itauq, mnthr;
+    logical wntqa;
+    integer nwork;
+    logical wntqn, wntqo, wntqs;
+    integer ie, il, ir, bdspac, iu, lwork_sorgbr_p_mm__;
+    extern /* Subroutine */ int sbdsdc_(char *, char *, integer *, real *, 
+	    real *, real *, integer *, real *, integer *, real *, integer *, 
+	    real *, integer *, integer *);
+    integer lwork_sorgbr_q_nn__;
+    extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer 
+	    *, real *, real *, real *, real *, real *, integer *, integer *);
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *), slascl_(char *, integer 
+	    *, integer *, real *, real *, integer *, integer *, real *, 
+	    integer *, integer *), sgeqrf_(integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), slacpy_(char 
+	    *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
+	    real *, integer *);
+    extern logical sisnan_(real *);
+    extern /* Subroutine */ int sorgbr_(char *, integer *, integer *, integer 
+	    *, real *, integer *, real *, real *, integer *, integer *);
+    integer ldwrkl;
+    extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , real *, integer *, integer *);
+    integer ldwrkr, minwrk, ldwrku, maxwrk;
+    extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *);
+    integer ldwkvt;
+    real smlnum;
+    logical wntqas;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *);
+    logical lquery;
+    integer blk;
+    real dum[1], eps;
+    integer ivt, lwork_sgebrd_mm__, lwork_sgebrd_mn__, lwork_sgebrd_nn__;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1 * 1;
+    vt -= vt_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = f2cmin(*m,*n);
+    wntqa = lsame_(jobz, "A");
+    wntqs = lsame_(jobz, "S");
+    wntqas = wntqa || wntqs;
+    wntqo = lsame_(jobz, "O");
+    wntqn = lsame_(jobz, "N");
+    lquery = *lwork == -1;
+
+    if (! (wntqa || wntqs || wntqo || wntqn)) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *
+	    m) {
+	*info = -8;
+    } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || 
+	    wntqo && *m >= *n && *ldvt < *n) {
+	*info = -10;
+    }
+
+/*     Compute workspace */
+/*       Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace allocated at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	bdspac = 0;
+	mnthr = (integer) (minmn * 11.f / 6.f);
+	if (*m >= *n && minmn > 0) {
+
+/*           Compute space needed for SBDSDC */
+
+	    if (wntqn) {
+/*              sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) */
+/*              keep 7*N for backwards compatibility. */
+		bdspac = *n * 7;
+	    } else {
+		bdspac = *n * 3 * *n + (*n << 2);
+	    }
+
+/*           Compute space preferred for each routine */
+	    sgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr);
+	    lwork_sgebrd_mn__ = (integer) dum[0];
+
+	    sgebrd_(n, n, dum, n, dum, dum, dum, dum, dum, &c_n1, &ierr);
+	    lwork_sgebrd_nn__ = (integer) dum[0];
+
+	    sgeqrf_(m, n, dum, m, dum, dum, &c_n1, &ierr);
+	    lwork_sgeqrf_mn__ = (integer) dum[0];
+
+	    sorgbr_("Q", n, n, n, dum, n, dum, dum, &c_n1, &ierr);
+	    lwork_sorgbr_q_nn__ = (integer) dum[0];
+
+	    sorgqr_(m, m, n, dum, m, dum, dum, &c_n1, &ierr);
+	    lwork_sorgqr_mm__ = (integer) dum[0];
+
+	    sorgqr_(m, n, n, dum, m, dum, dum, &c_n1, &ierr);
+	    lwork_sorgqr_mn__ = (integer) dum[0];
+
+	    sormbr_("P", "R", "T", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &
+		    ierr);
+	    lwork_sormbr_prt_nn__ = (integer) dum[0];
+
+	    sormbr_("Q", "L", "N", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &
+		    ierr);
+	    lwork_sormbr_qln_nn__ = (integer) dum[0];
+
+	    sormbr_("Q", "L", "N", m, n, n, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_sormbr_qln_mn__ = (integer) dum[0];
+
+	    sormbr_("Q", "L", "N", m, m, n, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_sormbr_qln_mm__ = (integer) dum[0];
+
+	    if (*m >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1 (M >> N, JOBZ='N') */
+
+		    wrkbl = *n + lwork_sgeqrf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sgebrd_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = bdspac + *n;
+		} else if (wntqo) {
+
+/*                 Path 2 (M >> N, JOBZ='O') */
+
+		    wrkbl = *n + lwork_sgeqrf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + lwork_sorgqr_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sgebrd_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + (*n << 1) * *n;
+		    minwrk = bdspac + (*n << 1) * *n + *n * 3;
+		} else if (wntqs) {
+
+/*                 Path 3 (M >> N, JOBZ='S') */
+
+		    wrkbl = *n + lwork_sgeqrf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + lwork_sorgqr_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sgebrd_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		} else if (wntqa) {
+
+/*                 Path 4 (M >> N, JOBZ='A') */
+
+		    wrkbl = *n + lwork_sgeqrf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + lwork_sorgqr_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sgebrd_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+/* Computing MAX */
+		    i__1 = *n * 3 + bdspac, i__2 = *n + *m;
+		    minwrk = *n * *n + f2cmax(i__1,i__2);
+		}
+	    } else {
+
+/*              Path 5 (M >= N, but not much larger) */
+
+		wrkbl = *n * 3 + lwork_sgebrd_mn__;
+		if (wntqn) {
+/*                 Path 5n (M >= N, jobz='N') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *n * 3 + f2cmax(*m,bdspac);
+		} else if (wntqo) {
+/*                 Path 5o (M >= N, jobz='O') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *m, i__2 = *n * *n + bdspac;
+		    minwrk = *n * 3 + f2cmax(i__1,i__2);
+		} else if (wntqs) {
+/*                 Path 5s (M >= N, jobz='S') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *n * 3 + f2cmax(*m,bdspac);
+		} else if (wntqa) {
+/*                 Path 5a (M >= N, jobz='A') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *n * 3 + f2cmax(*m,bdspac);
+		}
+	    }
+	} else if (minmn > 0) {
+
+/*           Compute space needed for SBDSDC */
+
+	    if (wntqn) {
+/*              sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) */
+/*              keep 7*N for backwards compatibility. */
+		bdspac = *m * 7;
+	    } else {
+		bdspac = *m * 3 * *m + (*m << 2);
+	    }
+
+/*           Compute space preferred for each routine */
+	    sgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr);
+	    lwork_sgebrd_mn__ = (integer) dum[0];
+
+	    sgebrd_(m, m, &a[a_offset], m, &s[1], dum, dum, dum, dum, &c_n1, &
+		    ierr);
+	    lwork_sgebrd_mm__ = (integer) dum[0];
+
+	    sgelqf_(m, n, &a[a_offset], m, dum, dum, &c_n1, &ierr);
+	    lwork_sgelqf_mn__ = (integer) dum[0];
+
+	    sorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr);
+	    lwork_sorglq_nn__ = (integer) dum[0];
+
+	    sorglq_(m, n, m, &a[a_offset], m, dum, dum, &c_n1, &ierr);
+	    lwork_sorglq_mn__ = (integer) dum[0];
+
+	    sorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
+	    lwork_sorgbr_p_mm__ = (integer) dum[0];
+
+	    sormbr_("P", "R", "T", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_sormbr_prt_mm__ = (integer) dum[0];
+
+	    sormbr_("P", "R", "T", m, n, m, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_sormbr_prt_mn__ = (integer) dum[0];
+
+	    sormbr_("P", "R", "T", n, n, m, dum, n, dum, dum, n, dum, &c_n1, &
+		    ierr);
+	    lwork_sormbr_prt_nn__ = (integer) dum[0];
+
+	    sormbr_("Q", "L", "N", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_sormbr_qln_mm__ = (integer) dum[0];
+
+	    if (*n >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1t (N >> M, JOBZ='N') */
+
+		    wrkbl = *m + lwork_sgelqf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sgebrd_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = bdspac + *m;
+		} else if (wntqo) {
+
+/*                 Path 2t (N >> M, JOBZ='O') */
+
+		    wrkbl = *m + lwork_sgelqf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + lwork_sorglq_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sgebrd_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + (*m << 1) * *m;
+		    minwrk = bdspac + (*m << 1) * *m + *m * 3;
+		} else if (wntqs) {
+
+/*                 Path 3t (N >> M, JOBZ='S') */
+
+		    wrkbl = *m + lwork_sgelqf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + lwork_sorglq_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sgebrd_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		} else if (wntqa) {
+
+/*                 Path 4t (N >> M, JOBZ='A') */
+
+		    wrkbl = *m + lwork_sgelqf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + lwork_sorglq_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sgebrd_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+/* Computing MAX */
+		    i__1 = *m * 3 + bdspac, i__2 = *m + *n;
+		    minwrk = *m * *m + f2cmax(i__1,i__2);
+		}
+	    } else {
+
+/*              Path 5t (N > M, but not much larger) */
+
+		wrkbl = *m * 3 + lwork_sgebrd_mn__;
+		if (wntqn) {
+/*                 Path 5tn (N > M, jobz='N') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *m * 3 + f2cmax(*n,bdspac);
+		} else if (wntqo) {
+/*                 Path 5to (N > M, jobz='O') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *n, i__2 = *m * *m + bdspac;
+		    minwrk = *m * 3 + f2cmax(i__1,i__2);
+		} else if (wntqs) {
+/*                 Path 5ts (N > M, jobz='S') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *m * 3 + f2cmax(*n,bdspac);
+		} else if (wntqa) {
+/*                 Path 5ta (N > M, jobz='A') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *m * 3 + f2cmax(*n,bdspac);
+		}
+	    }
+	}
+	maxwrk = f2cmax(maxwrk,minwrk);
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESDD", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = sqrt(slamch_("S")) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, dum);
+    if (sisnan_(&anrm)) {
+	*info = -4;
+	return 0;
+    }
+    iscl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*        A has at least as many rows as columns. If A has sufficiently */
+/*        more rows than columns, first reduce using the QR */
+/*        decomposition (if sufficient workspace available) */
+
+	if (*m >= mnthr) {
+
+	    if (wntqn) {
+
+/*              Path 1 (M >> N, JOBZ='N') */
+/*              No singular vectors to be computed */
+
+		itau = 1;
+		nwork = itau + *n;
+
+/*              Compute A=Q*R */
+/*              Workspace: need   N [tau] + N    [work] */
+/*              Workspace: prefer N [tau] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out below R */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		slaset_("L", &i__1, &i__2, &c_b63, &c_b63, &a[a_dim1 + 2], 
+			lda);
+		ie = 1;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*              Bidiagonalize R in A */
+/*              Workspace: need   3*N [e, tauq, taup] + N      [work] */
+/*              Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *n;
+
+/*              Perform bidiagonal SVD, computing singular values only */
+/*              Workspace: need   N [e] + BDSPAC */
+
+		sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*              Path 2 (M >> N, JOBZ = 'O') */
+/*              N left singular vectors to be overwritten on A and */
+/*              N right singular vectors to be computed in VT */
+
+		ir = 1;
+
+/*              WORK(IR) is LDWRKR by N */
+
+		if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) {
+		    ldwrkr = *lda;
+		} else {
+		    ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n;
+		}
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*              Compute A=Q*R */
+/*              Workspace: need   N*N [R] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [R] + N [tau] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		slaset_("L", &i__1, &i__2, &c_b63, &c_b63, &work[ir + 1], &
+			ldwrkr);
+
+/*              Generate Q in A */
+/*              Workspace: need   N*N [R] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [R] + N [tau] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*              Bidiagonalize R in WORK(IR) */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work] */
+/*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*              WORK(IU) is N by N */
+
+		iu = nwork;
+		nwork = iu + *n * *n;
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in WORK(IU) and computing right */
+/*              singular vectors of bidiagonal matrix in VT */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC */
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite WORK(IU) by left singular vectors of R */
+/*              and VT by right singular vectors of R */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N    [work] */
+/*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &work[iu], n, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*              Multiply Q in A by left singular vectors of R in */
+/*              WORK(IU), storing result in WORK(IR) and copying to A */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] */
+/*              Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U] */
+
+		i__1 = *m;
+		i__2 = ldwrkr;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
+			i__2) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = f2cmin(i__3,ldwrkr);
+		    sgemm_("N", "N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], 
+			    lda, &work[iu], n, &c_b63, &work[ir], &ldwrkr);
+		    slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + 
+			    a_dim1], lda);
+/* L10: */
+		}
+
+	    } else if (wntqs) {
+
+/*              Path 3 (M >> N, JOBZ='S') */
+/*              N left singular vectors to be computed in U and */
+/*              N right singular vectors to be computed in VT */
+
+		ir = 1;
+
+/*              WORK(IR) is N by N */
+
+		ldwrkr = *n;
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*              Compute A=Q*R */
+/*              Workspace: need   N*N [R] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [R] + N [tau] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		slaset_("L", &i__2, &i__1, &c_b63, &c_b63, &work[ir + 1], &
+			ldwrkr);
+
+/*              Generate Q in A */
+/*              Workspace: need   N*N [R] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [R] + N [tau] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*              Bidiagonalize R in WORK(IR) */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work] */
+/*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagoal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + BDSPAC */
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of R and VT */
+/*              by right singular vectors of R */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N    [work] */
+/*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*              Multiply Q in A by left singular vectors of R in */
+/*              WORK(IR), storing result in U */
+/*              Workspace: need   N*N [R] */
+
+		slacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		sgemm_("N", "N", m, n, n, &c_b84, &a[a_offset], lda, &work[ir]
+			, &ldwrkr, &c_b63, &u[u_offset], ldu);
+
+	    } else if (wntqa) {
+
+/*              Path 4 (M >> N, JOBZ='A') */
+/*              M left singular vectors to be computed in U and */
+/*              N right singular vectors to be computed in VT */
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		itau = iu + ldwrku * *n;
+		nwork = itau + *n;
+
+/*              Compute A=Q*R, copying result to U */
+/*              Workspace: need   N*N [U] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [U] + N [tau] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*              Generate Q in U */
+/*              Workspace: need   N*N [U] + N [tau] + M    [work] */
+/*              Workspace: prefer N*N [U] + N [tau] + M*NB [work] */
+		i__2 = *lwork - nwork + 1;
+		sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+
+/*              Produce R in A, zeroing out other entries */
+
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		slaset_("L", &i__2, &i__1, &c_b63, &c_b63, &a[a_dim1 + 2], 
+			lda);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*              Bidiagonalize R in A */
+/*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N      [work] */
+/*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in WORK(IU) and computing right */
+/*              singular vectors of bidiagonal matrix in VT */
+/*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + BDSPAC */
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite WORK(IU) by left singular vectors of R and VT */
+/*              by right singular vectors of R */
+/*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N    [work] */
+/*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			ierr);
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*              Multiply Q in U by left singular vectors of R in */
+/*              WORK(IU), storing result in A */
+/*              Workspace: need   N*N [U] */
+
+		sgemm_("N", "N", m, n, n, &c_b84, &u[u_offset], ldu, &work[iu]
+			, &ldwrku, &c_b63, &a[a_offset], lda);
+
+/*              Copy left singular vectors of A from A to U */
+
+		slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+	    }
+
+	} else {
+
+/*           M .LT. MNTHR */
+
+/*           Path 5 (M >= N, but not much larger) */
+/*           Reduce to bidiagonal form without QR decomposition */
+
+	    ie = 1;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*           Bidiagonalize A */
+/*           Workspace: need   3*N [e, tauq, taup] + M        [work] */
+/*           Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work] */
+
+	    i__2 = *lwork - nwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*              Path 5n (M >= N, JOBZ='N') */
+/*              Perform bidiagonal SVD, only computing singular values */
+/*              Workspace: need   3*N [e, tauq, taup] + BDSPAC */
+
+		sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+/*              Path 5o (M >= N, JOBZ='O') */
+		iu = nwork;
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		    nwork = iu + ldwrku * *n;
+		    slaset_("F", m, n, &c_b63, &c_b63, &work[iu], &ldwrku);
+/*                 IR is unused; silence compile warnings */
+		    ir = -1;
+		} else {
+
+/*                 WORK( IU ) is N by N */
+
+		    ldwrku = *n;
+		    nwork = iu + ldwrku * *n;
+
+/*                 WORK(IR) is LDWRKR by N */
+
+		    ir = nwork;
+		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in WORK(IU) and computing right */
+/*              singular vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + BDSPAC */
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
+			vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[
+			1], info);
+
+/*              Overwrite VT by right singular vectors of A */
+/*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work] */
+/*              Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*                 Path 5o-fast */
+/*                 Overwrite WORK(IU) by left singular vectors of A */
+/*                 Workspace: need   3*N [e, tauq, taup] + M*N [U] + N    [work] */
+/*                 Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work] */
+
+		    i__2 = *lwork - nwork + 1;
+		    sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			    itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			    ierr);
+
+/*                 Copy left singular vectors of A from WORK(IU) to A */
+
+		    slacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		} else {
+
+/*                 Path 5o-slow */
+/*                 Generate Q in A */
+/*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work] */
+/*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */
+
+		    i__2 = *lwork - nwork + 1;
+		    sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[nwork], &i__2, &ierr);
+
+/*                 Multiply Q in A by left singular vectors of */
+/*                 bidiagonal matrix in WORK(IU), storing result in */
+/*                 WORK(IR) and copying to A */
+/*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + NB*N [R] */
+/*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N  [R] */
+
+		    i__2 = *m;
+		    i__1 = ldwrkr;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *m - i__ + 1;
+			chunk = f2cmin(i__3,ldwrkr);
+			sgemm_("N", "N", &chunk, n, n, &c_b84, &a[i__ + 
+				a_dim1], lda, &work[iu], &ldwrku, &c_b63, &
+				work[ir], &ldwrkr);
+			slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + 
+				a_dim1], lda);
+/* L20: */
+		    }
+		}
+
+	    } else if (wntqs) {
+
+/*              Path 5s (M >= N, JOBZ='S') */
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*N [e, tauq, taup] + BDSPAC */
+
+		slaset_("F", m, n, &c_b63, &c_b63, &u[u_offset], ldu);
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of A and VT */
+/*              by right singular vectors of A */
+/*              Workspace: need   3*N [e, tauq, taup] + N    [work] */
+/*              Workspace: prefer 3*N [e, tauq, taup] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*              Path 5a (M >= N, JOBZ='A') */
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*N [e, tauq, taup] + BDSPAC */
+
+		slaset_("F", m, m, &c_b63, &c_b63, &u[u_offset], ldu);
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Set the right corner of U to identity matrix */
+
+		if (*m > *n) {
+		    i__1 = *m - *n;
+		    i__2 = *m - *n;
+		    slaset_("F", &i__1, &i__2, &c_b63, &c_b84, &u[*n + 1 + (*
+			    n + 1) * u_dim1], ldu);
+		}
+
+/*              Overwrite U by left singular vectors of A and VT */
+/*              by right singular vectors of A */
+/*              Workspace: need   3*N [e, tauq, taup] + M    [work] */
+/*              Workspace: prefer 3*N [e, tauq, taup] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    } else {
+
+/*        A has more columns than rows. If A has sufficiently more */
+/*        columns than rows, first reduce using the LQ decomposition (if */
+/*        sufficient workspace available) */
+
+	if (*n >= mnthr) {
+
+	    if (wntqn) {
+
+/*              Path 1t (N >> M, JOBZ='N') */
+/*              No singular vectors to be computed */
+
+		itau = 1;
+		nwork = itau + *m;
+
+/*              Compute A=L*Q */
+/*              Workspace: need   M [tau] + M [work] */
+/*              Workspace: prefer M [tau] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out above L */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		slaset_("U", &i__1, &i__2, &c_b63, &c_b63, &a[(a_dim1 << 1) + 
+			1], lda);
+		ie = 1;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*              Bidiagonalize L in A */
+/*              Workspace: need   3*M [e, tauq, taup] + M      [work] */
+/*              Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *m;
+
+/*              Perform bidiagonal SVD, computing singular values only */
+/*              Workspace: need   M [e] + BDSPAC */
+
+		sbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*              Path 2t (N >> M, JOBZ='O') */
+/*              M right singular vectors to be overwritten on A and */
+/*              M left singular vectors to be computed in U */
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+/*              WORK(IL)  is M by M; it is later resized to M by chunk for gemm */
+
+		il = ivt + *m * *m;
+		if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) {
+		    ldwrkl = *m;
+		    chunk = *n;
+		} else {
+		    ldwrkl = *m;
+		    chunk = (*lwork - *m * *m) / *m;
+		}
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*              Compute A=L*Q */
+/*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy L to WORK(IL), zeroing about above it */
+
+		slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		slaset_("U", &i__1, &i__2, &c_b63, &c_b63, &work[il + ldwrkl],
+			 &ldwrkl);
+
+/*              Generate Q in A */
+/*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*              Bidiagonalize L in WORK(IL) */
+/*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M      [work] */
+/*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U, and computing right singular */
+/*              vectors of bidiagonal matrix in WORK(IVT) */
+/*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC */
+
+		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], m, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of L and WORK(IVT) */
+/*              by right singular vectors of L */
+/*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M    [work] */
+/*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &work[ivt], m, &work[nwork], &i__1, &ierr);
+
+/*              Multiply right singular vectors of L in WORK(IVT) by Q */
+/*              in A, storing result in WORK(IL) and copying to A */
+/*              Workspace: need   M*M [VT] + M*M [L] */
+/*              Workspace: prefer M*M [VT] + M*N [L] */
+/*              At this point, L is resized as M by chunk. */
+
+		i__1 = *n;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
+			i__2) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = f2cmin(i__3,chunk);
+		    sgemm_("N", "N", m, &blk, m, &c_b84, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b63, &work[il], &
+			    ldwrkl);
+		    slacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 
+			    + 1], lda);
+/* L30: */
+		}
+
+	    } else if (wntqs) {
+
+/*              Path 3t (N >> M, JOBZ='S') */
+/*              M right singular vectors to be computed in VT and */
+/*              M left singular vectors to be computed in U */
+
+		il = 1;
+
+/*              WORK(IL) is M by M */
+
+		ldwrkl = *m;
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*              Compute A=L*Q */
+/*              Workspace: need   M*M [L] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [L] + M [tau] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy L to WORK(IL), zeroing out above it */
+
+		slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		slaset_("U", &i__2, &i__1, &c_b63, &c_b63, &work[il + ldwrkl],
+			 &ldwrkl);
+
+/*              Generate Q in A */
+/*              Workspace: need   M*M [L] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [L] + M [tau] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*              Bidiagonalize L in WORK(IU). */
+/*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M      [work] */
+/*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + BDSPAC */
+
+		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of L and VT */
+/*              by right singular vectors of L */
+/*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M    [work] */
+/*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*              Multiply right singular vectors of L in WORK(IL) by */
+/*              Q in A, storing result in VT */
+/*              Workspace: need   M*M [L] */
+
+		slacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		sgemm_("N", "N", m, n, m, &c_b84, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b63, &vt[vt_offset], ldvt);
+
+	    } else if (wntqa) {
+
+/*              Path 4t (N >> M, JOBZ='A') */
+/*              N right singular vectors to be computed in VT and */
+/*              M left singular vectors to be computed in U */
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+
+		ldwkvt = *m;
+		itau = ivt + ldwkvt * *m;
+		nwork = itau + *m;
+
+/*              Compute A=L*Q, copying result to VT */
+/*              Workspace: need   M*M [VT] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [VT] + M [tau] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*              Generate Q in VT */
+/*              Workspace: need   M*M [VT] + M [tau] + N    [work] */
+/*              Workspace: prefer M*M [VT] + M [tau] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+			nwork], &i__2, &ierr);
+
+/*              Produce L in A, zeroing out other entries */
+
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		slaset_("U", &i__2, &i__1, &c_b63, &c_b63, &a[(a_dim1 << 1) + 
+			1], lda);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*              Bidiagonalize L in A */
+/*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + M      [work] */
+/*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in WORK(IVT) */
+/*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + BDSPAC */
+
+		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*              Overwrite U by left singular vectors of L and WORK(IVT) */
+/*              by right singular vectors of L */
+/*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup]+ M    [work] */
+/*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
+			ierr);
+
+/*              Multiply right singular vectors of L in WORK(IVT) by */
+/*              Q in VT, storing result in A */
+/*              Workspace: need   M*M [VT] */
+
+		sgemm_("N", "N", m, n, m, &c_b84, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b63, &a[a_offset], lda);
+
+/*              Copy right singular vectors of A from A to VT */
+
+		slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+	    }
+
+	} else {
+
+/*           N .LT. MNTHR */
+
+/*           Path 5t (N > M, but not much larger) */
+/*           Reduce to bidiagonal form without LQ decomposition */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*           Bidiagonalize A */
+/*           Workspace: need   3*M [e, tauq, taup] + N        [work] */
+/*           Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work] */
+
+	    i__2 = *lwork - nwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*              Path 5tn (N > M, JOBZ='N') */
+/*              Perform bidiagonal SVD, only computing singular values */
+/*              Workspace: need   3*M [e, tauq, taup] + BDSPAC */
+
+		sbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+/*              Path 5to (N > M, JOBZ='O') */
+		ldwkvt = *m;
+		ivt = nwork;
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    slaset_("F", m, n, &c_b63, &c_b63, &work[ivt], &ldwkvt);
+		    nwork = ivt + ldwkvt * *n;
+/*                 IL is unused; silence compile warnings */
+		    il = -1;
+		} else {
+
+/*                 WORK( IVT ) is M by M */
+
+		    nwork = ivt + ldwkvt * *m;
+		    il = nwork;
+
+/*                 WORK(IL) is M by CHUNK */
+
+		    chunk = (*lwork - *m * *m - *m * 3) / *m;
+		}
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in WORK(IVT) */
+/*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + BDSPAC */
+
+		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*              Overwrite U by left singular vectors of A */
+/*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work] */
+/*              Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*                 Path 5to-fast */
+/*                 Overwrite WORK(IVT) by left singular vectors of A */
+/*                 Workspace: need   3*M [e, tauq, taup] + M*N [VT] + M    [work] */
+/*                 Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work] */
+
+		    i__2 = *lwork - nwork + 1;
+		    sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, 
+			    &ierr);
+
+/*                 Copy right singular vectors of A from WORK(IVT) to A */
+
+		    slacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		} else {
+
+/*                 Path 5to-slow */
+/*                 Generate P**T in A */
+/*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work] */
+/*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] */
+
+		    i__2 = *lwork - nwork + 1;
+		    sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[nwork], &i__2, &ierr);
+
+/*                 Multiply Q in A by right singular vectors of */
+/*                 bidiagonal matrix in WORK(IVT), storing result in */
+/*                 WORK(IL) and copying to A */
+/*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M*NB [L] */
+/*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N  [L] */
+
+		    i__2 = *n;
+		    i__1 = chunk;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *n - i__ + 1;
+			blk = f2cmin(i__3,chunk);
+			sgemm_("N", "N", m, &blk, m, &c_b84, &work[ivt], &
+				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b63, &
+				work[il], m);
+			slacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + 
+				1], lda);
+/* L40: */
+		    }
+		}
+	    } else if (wntqs) {
+
+/*              Path 5ts (N > M, JOBZ='S') */
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*M [e, tauq, taup] + BDSPAC */
+
+		slaset_("F", m, n, &c_b63, &c_b63, &vt[vt_offset], ldvt);
+		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of A and VT */
+/*              by right singular vectors of A */
+/*              Workspace: need   3*M [e, tauq, taup] + M    [work] */
+/*              Workspace: prefer 3*M [e, tauq, taup] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*              Path 5ta (N > M, JOBZ='A') */
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*M [e, tauq, taup] + BDSPAC */
+
+		slaset_("F", n, n, &c_b63, &c_b63, &vt[vt_offset], ldvt);
+		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Set the right corner of VT to identity matrix */
+
+		if (*n > *m) {
+		    i__1 = *n - *m;
+		    i__2 = *n - *m;
+		    slaset_("F", &i__1, &i__2, &c_b63, &c_b84, &vt[*m + 1 + (*
+			    m + 1) * vt_dim1], ldvt);
+		}
+
+/*              Overwrite U by left singular vectors of A and VT */
+/*              by right singular vectors of A */
+/*              Workspace: need   3*M [e, tauq, taup] + N    [work] */
+/*              Workspace: prefer 3*M [e, tauq, taup] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (real) maxwrk;
+
+    return 0;
+
+/*     End of SGESDD */
+
+} /* sgesdd_ */
+
diff --git a/lapack-netlib/SRC/sgesv.c b/lapack-netlib/SRC/sgesv.c
new file mode 100644
index 000000000..694ba139e
--- /dev/null
+++ b/lapack-netlib/SRC/sgesv.c
@@ -0,0 +1,576 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief <b> SGESV computes the solution to system of linear equations A * X = B for GE matrices</b> (simpl
+e driver) */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGESV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesv.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesv.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesv.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, N, NRHS */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               A( LDA, * ), B( LDB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGESV computes the solution to a real system of linear equations */
+/* >    A * X = B, */
+/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */
+/* > */
+/* > The LU decomposition with partial pivoting and row interchanges is */
+/* > used to factor A as */
+/* >    A = P * L * U, */
+/* > where P is a permutation matrix, L is unit lower triangular, and U is */
+/* > upper triangular.  The factored form of A is then used to solve the */
+/* > system of equations A * X = B. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of linear equations, i.e., the order of the */
+/* >          matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the N-by-N coefficient matrix A. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U; the unit diagonal elements of L are not stored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices that define the permutation matrix P; */
+/* >          row i of the matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the N-by-NRHS matrix of right hand side matrix B. */
+/* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization */
+/* >                has been completed, but the factor U is exactly */
+/* >                singular, so the solution could not be computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgesv_(integer *n, integer *nrhs, real *a, integer *lda, 
+	integer *ipiv, real *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgetrf_(
+	    integer *, integer *, real *, integer *, integer *, integer *), 
+	    sgetrs_(char *, integer *, integer *, real *, integer *, integer *
+	    , real *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -4;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESV ", &i__1, (ftnlen)5);
+	return 0;
+    }
+
+/*     Compute the LU factorization of A. */
+
+    sgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
+
+/*        Solve the system A*X = B, overwriting B with X. */
+
+	sgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+		b_offset], ldb, info);
+    }
+    return 0;
+
+/*     End of SGESV */
+
+} /* sgesv_ */
+
diff --git a/lapack-netlib/SRC/sgesvd.c b/lapack-netlib/SRC/sgesvd.c
new file mode 100644
index 000000000..33df4bf82
--- /dev/null
+++ b/lapack-netlib/SRC/sgesvd.c
@@ -0,0 +1,4471 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__6 = 6;
+static integer c__0 = 0;
+static integer c__2 = 2;
+static integer c_n1 = -1;
+static real c_b57 = 0.f;
+static integer c__1 = 1;
+static real c_b79 = 1.f;
+
+/* > \brief <b> SGESVD computes the singular value decomposition (SVD) for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGESVD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, */
+/*                          WORK, LWORK, INFO ) */
+
+/*       CHARACTER          JOBU, JOBVT */
+/*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N */
+/*       REAL               A( LDA, * ), S( * ), U( LDU, * ), */
+/*      $                   VT( LDVT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGESVD computes the singular value decomposition (SVD) of a real */
+/* > M-by-N matrix A, optionally computing the left and/or right singular */
+/* > vectors. The SVD is written */
+/* > */
+/* >      A = U * SIGMA * transpose(V) */
+/* > */
+/* > where SIGMA is an M-by-N matrix which is zero except for its */
+/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */
+/* > V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA */
+/* > are the singular values of A; they are real and non-negative, and */
+/* > are returned in descending order.  The first f2cmin(m,n) columns of */
+/* > U and V are the left and right singular vectors of A. */
+/* > */
+/* > Note that the routine returns V**T, not V. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          Specifies options for computing all or part of the matrix U: */
+/* >          = 'A':  all M columns of U are returned in array U: */
+/* >          = 'S':  the first f2cmin(m,n) columns of U (the left singular */
+/* >                  vectors) are returned in the array U; */
+/* >          = 'O':  the first f2cmin(m,n) columns of U (the left singular */
+/* >                  vectors) are overwritten on the array A; */
+/* >          = 'N':  no columns of U (no left singular vectors) are */
+/* >                  computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVT */
+/* > \verbatim */
+/* >          JOBVT is CHARACTER*1 */
+/* >          Specifies options for computing all or part of the matrix */
+/* >          V**T: */
+/* >          = 'A':  all N rows of V**T are returned in the array VT; */
+/* >          = 'S':  the first f2cmin(m,n) rows of V**T (the right singular */
+/* >                  vectors) are returned in the array VT; */
+/* >          = 'O':  the first f2cmin(m,n) rows of V**T (the right singular */
+/* >                  vectors) are overwritten on the array A; */
+/* >          = 'N':  no rows of V**T (no right singular vectors) are */
+/* >                  computed. */
+/* > */
+/* >          JOBVT and JOBU cannot both be 'O'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          if JOBU = 'O',  A is overwritten with the first f2cmin(m,n) */
+/* >                          columns of U (the left singular vectors, */
+/* >                          stored columnwise); */
+/* >          if JOBVT = 'O', A is overwritten with the first f2cmin(m,n) */
+/* >                          rows of V**T (the right singular vectors, */
+/* >                          stored rowwise); */
+/* >          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */
+/* >                          are destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is REAL array, dimension (f2cmin(M,N)) */
+/* >          The singular values of A, sorted so that S(i) >= S(i+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is REAL array, dimension (LDU,UCOL) */
+/* >          (LDU,M) if JOBU = 'A' or (LDU,f2cmin(M,N)) if JOBU = 'S'. */
+/* >          If JOBU = 'A', U contains the M-by-M orthogonal matrix U; */
+/* >          if JOBU = 'S', U contains the first f2cmin(m,n) columns of U */
+/* >          (the left singular vectors, stored columnwise); */
+/* >          if JOBU = 'N' or 'O', U is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U.  LDU >= 1; if */
+/* >          JOBU = 'S' or 'A', LDU >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VT */
+/* > \verbatim */
+/* >          VT is REAL array, dimension (LDVT,N) */
+/* >          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix */
+/* >          V**T; */
+/* >          if JOBVT = 'S', VT contains the first f2cmin(m,n) rows of */
+/* >          V**T (the right singular vectors, stored rowwise); */
+/* >          if JOBVT = 'N' or 'O', VT is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVT */
+/* > \verbatim */
+/* >          LDVT is INTEGER */
+/* >          The leading dimension of the array VT.  LDVT >= 1; if */
+/* >          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= f2cmin(M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */
+/* >          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged */
+/* >          superdiagonal elements of an upper bidiagonal matrix B */
+/* >          whose diagonal is in S (not necessarily sorted). B */
+/* >          satisfies A = U * B * VT, so it has the same singular values */
+/* >          as A, and singular vectors related by U and VT. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): */
+/* >             - PATH 1  (M much larger than N, JOBU='N') */
+/* >             - PATH 1t (N much larger than M, JOBVT='N') */
+/* >          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths */
+/* >          For good performance, LWORK should generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if SBDSQR did not converge, INFO specifies how many */
+/* >                superdiagonals of an intermediate bidiagonal form B */
+/* >                did not converge to zero. See the description of WORK */
+/* >                above for details. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup realGEsing */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgesvd_(char *jobu, char *jobvt, integer *m, integer *n, 
+	real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, 
+	integer *ldvt, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], 
+	    i__2, i__3, i__4;
+    char ch__1[2];
+
+    /* Local variables */
+    integer iscl;
+    real anrm;
+    integer ierr, itau, ncvt, nrvt, lwork_sgebrd__, lwork_sgelqf__, 
+	    lwork_sgeqrf__, i__;
+    extern logical lsame_(char *, char *);
+    integer chunk;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *);
+    integer minmn, wrkbl, itaup, itauq, mnthr, iwork;
+    logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs;
+    integer ie, ir, bdspac, iu;
+    extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer 
+	    *, real *, real *, real *, real *, real *, integer *, integer *);
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *), slascl_(char *, integer 
+	    *, integer *, real *, real *, integer *, integer *, real *, 
+	    integer *, integer *), sgeqrf_(integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *), slacpy_(char 
+	    *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
+	    real *, integer *), sbdsqr_(char *, integer *, integer *, 
+	    integer *, integer *, real *, real *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *), sorgbr_(
+	    char *, integer *, integer *, integer *, real *, integer *, real *
+	    , real *, integer *, integer *), sormbr_(char *, char *, 
+	    char *, integer *, integer *, integer *, real *, integer *, real *
+	    , real *, integer *, real *, integer *, integer *);
+    integer ldwrkr, minwrk, ldwrku, maxwrk;
+    extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *);
+    real smlnum;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *);
+    logical lquery, wntuas, wntvas;
+    integer blk, lwork_sorgbr_p__, lwork_sorgbr_q__, lwork_sorglq_m__, 
+	    lwork_sorglq_n__, ncu, lwork_sorgqr_n__, lwork_sorgqr_m__;
+    real eps, dum[1];
+    integer nru;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1 * 1;
+    vt -= vt_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    minmn = f2cmin(*m,*n);
+    wntua = lsame_(jobu, "A");
+    wntus = lsame_(jobu, "S");
+    wntuas = wntua || wntus;
+    wntuo = lsame_(jobu, "O");
+    wntun = lsame_(jobu, "N");
+    wntva = lsame_(jobvt, "A");
+    wntvs = lsame_(jobvt, "S");
+    wntvas = wntva || wntvs;
+    wntvo = lsame_(jobvt, "O");
+    wntvn = lsame_(jobvt, "N");
+    lquery = *lwork == -1;
+
+    if (! (wntua || wntus || wntuo || wntun)) {
+	*info = -1;
+    } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -6;
+    } else if (*ldu < 1 || wntuas && *ldu < *m) {
+	*info = -9;
+    } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) {
+	*info = -11;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV.) */
+
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	if (*m >= *n && minmn > 0) {
+
+/*           Compute space needed for SBDSQR */
+
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = jobu;
+	    i__1[1] = 1, a__1[1] = jobvt;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (
+		    ftnlen)6, (ftnlen)2);
+	    bdspac = *n * 5;
+/*           Compute space needed for SGEQRF */
+	    sgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_sgeqrf__ = (integer) dum[0];
+/*           Compute space needed for SORGQR */
+	    sorgqr_(m, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_sorgqr_n__ = (integer) dum[0];
+	    sorgqr_(m, m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_sorgqr_m__ = (integer) dum[0];
+/*           Compute space needed for SGEBRD */
+	    sgebrd_(n, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1,
+		     &ierr);
+	    lwork_sgebrd__ = (integer) dum[0];
+/*           Compute space needed for SORGBR P */
+	    sorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_sorgbr_p__ = (integer) dum[0];
+/*           Compute space needed for SORGBR Q */
+	    sorgbr_("Q", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_sorgbr_q__ = (integer) dum[0];
+
+	    if (*m >= mnthr) {
+		if (wntun) {
+
+/*                 Path 1 (M much larger than N, JOBU='N') */
+
+		    maxwrk = *n + lwork_sgeqrf__;
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * 3 + lwork_sgebrd__;
+		    maxwrk = f2cmax(i__2,i__3);
+		    if (wntvo || wntvas) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_p__;
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		    i__2 = *n << 2;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntuo && wntvn) {
+
+/*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
+
+		    wrkbl = *n + lwork_sgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+/* Computing MAX */
+		    i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
+		    maxwrk = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntuo && wntvas) {
+
+/*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */
+/*                 'A') */
+
+		    wrkbl = *n + lwork_sgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+/* Computing MAX */
+		    i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
+		    maxwrk = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntus && wntvn) {
+
+/*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
+
+		    wrkbl = *n + lwork_sgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *n * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntus && wntvo) {
+
+/*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
+
+		    wrkbl = *n + lwork_sgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = (*n << 1) * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntus && wntvas) {
+
+/*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */
+/*                 'A') */
+
+		    wrkbl = *n + lwork_sgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *n * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntua && wntvn) {
+
+/*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
+
+		    wrkbl = *n + lwork_sgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *n * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntua && wntvo) {
+
+/*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
+
+		    wrkbl = *n + lwork_sgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = (*n << 1) * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntua && wntvas) {
+
+/*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */
+/*                 'A') */
+
+		    wrkbl = *n + lwork_sgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *n * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		}
+	    } else {
+
+/*              Path 10 (M at least N, but not much larger) */
+
+		sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &
+			c_n1, &ierr);
+		lwork_sgebrd__ = (integer) dum[0];
+		maxwrk = *n * 3 + lwork_sgebrd__;
+		if (wntus || wntuo) {
+		    sorgbr_("Q", m, n, n, &a[a_offset], lda, dum, dum, &c_n1, 
+			    &ierr);
+		    lwork_sorgbr_q__ = (integer) dum[0];
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		if (wntua) {
+		    sorgbr_("Q", m, m, n, &a[a_offset], lda, dum, dum, &c_n1, 
+			    &ierr);
+		    lwork_sorgbr_q__ = (integer) dum[0];
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_q__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		if (! wntvn) {
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_p__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		i__2 = *n * 3 + *m;
+		minwrk = f2cmax(i__2,bdspac);
+	    }
+	} else if (minmn > 0) {
+
+/*           Compute space needed for SBDSQR */
+
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = jobu;
+	    i__1[1] = 1, a__1[1] = jobvt;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (
+		    ftnlen)6, (ftnlen)2);
+	    bdspac = *m * 5;
+/*           Compute space needed for SGELQF */
+	    sgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_sgelqf__ = (integer) dum[0];
+/*           Compute space needed for SORGLQ */
+	    sorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr);
+	    lwork_sorglq_n__ = (integer) dum[0];
+	    sorglq_(m, n, m, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_sorglq_m__ = (integer) dum[0];
+/*           Compute space needed for SGEBRD */
+	    sgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1,
+		     &ierr);
+	    lwork_sgebrd__ = (integer) dum[0];
+/*            Compute space needed for SORGBR P */
+	    sorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
+	    lwork_sorgbr_p__ = (integer) dum[0];
+/*           Compute space needed for SORGBR Q */
+	    sorgbr_("Q", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
+	    lwork_sorgbr_q__ = (integer) dum[0];
+	    if (*n >= mnthr) {
+		if (wntvn) {
+
+/*                 Path 1t(N much larger than M, JOBVT='N') */
+
+		    maxwrk = *m + lwork_sgelqf__;
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * 3 + lwork_sgebrd__;
+		    maxwrk = f2cmax(i__2,i__3);
+		    if (wntuo || wntuas) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_q__;
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		    i__2 = *m << 2;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntvo && wntun) {
+
+/*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
+
+		    wrkbl = *m + lwork_sgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+/* Computing MAX */
+		    i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
+		    maxwrk = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntvo && wntuas) {
+
+/*                 Path 3t(N much larger than M, JOBU='S' or 'A', */
+/*                 JOBVT='O') */
+
+		    wrkbl = *m + lwork_sgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+/* Computing MAX */
+		    i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
+		    maxwrk = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntvs && wntun) {
+
+/*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
+
+		    wrkbl = *m + lwork_sgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *m * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntvs && wntuo) {
+
+/*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
+
+		    wrkbl = *m + lwork_sgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = (*m << 1) * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		    maxwrk = f2cmax(maxwrk,minwrk);
+		} else if (wntvs && wntuas) {
+
+/*                 Path 6t(N much larger than M, JOBU='S' or 'A', */
+/*                 JOBVT='S') */
+
+		    wrkbl = *m + lwork_sgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *m * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntva && wntun) {
+
+/*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
+
+		    wrkbl = *m + lwork_sgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *m * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntva && wntuo) {
+
+/*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
+
+		    wrkbl = *m + lwork_sgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = (*m << 1) * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntva && wntuas) {
+
+/*                 Path 9t(N much larger than M, JOBU='S' or 'A', */
+/*                 JOBVT='A') */
+
+		    wrkbl = *m + lwork_sgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *m * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		}
+	    } else {
+
+/*              Path 10t(N greater than M, but not much larger) */
+
+		sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &
+			c_n1, &ierr);
+		lwork_sgebrd__ = (integer) dum[0];
+		maxwrk = *m * 3 + lwork_sgebrd__;
+		if (wntvs || wntvo) {
+/*                Compute space needed for SORGBR P */
+		    sorgbr_("P", m, n, m, &a[a_offset], n, dum, dum, &c_n1, &
+			    ierr);
+		    lwork_sorgbr_p__ = (integer) dum[0];
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		if (wntva) {
+		    sorgbr_("P", n, n, m, &a[a_offset], n, dum, dum, &c_n1, &
+			    ierr);
+		    lwork_sorgbr_p__ = (integer) dum[0];
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_p__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		if (! wntun) {
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_q__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		i__2 = *m * 3 + *n;
+		minwrk = f2cmax(i__2,bdspac);
+	    }
+	}
+	maxwrk = f2cmax(maxwrk,minwrk);
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("SGESVD", &i__2, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = sqrt(slamch_("S")) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*        A has at least as many rows as columns. If A has sufficiently */
+/*        more rows than columns, first reduce using the QR */
+/*        decomposition (if sufficient workspace available) */
+
+	if (*m >= mnthr) {
+
+	    if (wntun) {
+
+/*              Path 1 (M much larger than N, JOBU='N') */
+/*              No left singular vectors to be computed */
+
+		itau = 1;
+		iwork = itau + *n;
+
+/*              Compute A=Q*R */
+/*              (Workspace: need 2*N, prefer N+N*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
+			i__2, &ierr);
+
+/*              Zero out below R */
+
+		if (*n > 1) {
+		    i__2 = *n - 1;
+		    i__3 = *n - 1;
+		    slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[a_dim1 + 2],
+			     lda);
+		}
+		ie = 1;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		iwork = itaup + *n;
+
+/*              Bidiagonalize R in A */
+/*              (Workspace: need 4*N, prefer 3*N+2*N*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+		ncvt = 0;
+		if (wntvo || wntvas) {
+
+/*                 If right singular vectors desired, generate P'. */
+/*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &
+			    work[iwork], &i__2, &ierr);
+		    ncvt = *n;
+		}
+		iwork = ie + *n;
+
+/*              Perform bidiagonal QR iteration, computing right */
+/*              singular vectors of A in A if desired */
+/*              (Workspace: need BDSPAC) */
+
+		sbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[
+			a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], 
+			info);
+
+/*              If right singular vectors desired in VT, copy them there */
+
+		if (wntvas) {
+		    slacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], 
+			    ldvt);
+		}
+
+	    } else if (wntuo && wntvn) {
+
+/*              Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
+/*              N left singular vectors to be overwritten on A and */
+/*              no right singular vectors to be computed */
+
+/* Computing MAX */
+		i__2 = *n << 2;
+		if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                 Sufficient workspace for a fast algorithm */
+
+		    ir = 1;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *lda * *n + *n;
+		    if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) {
+
+/*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N */
+
+			ldwrku = *lda;
+			ldwrkr = *lda;
+		    } else /* if(complicated condition) */ {
+/* Computing MAX */
+			i__2 = wrkbl, i__3 = *lda * *n + *n;
+			if (*lwork >= f2cmax(i__2,i__3) + *n * *n) {
+
+/*                    WORK(IU) is LDA by N, WORK(IR) is N by N */
+
+			    ldwrku = *lda;
+			    ldwrkr = *n;
+			} else {
+
+/*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N */
+
+			    ldwrku = (*lwork - *n * *n - *n) / *n;
+			    ldwrkr = *n;
+			}
+		    }
+		    itau = ir + ldwrkr * *n;
+		    iwork = itau + *n;
+
+/*                 Compute A=Q*R */
+/*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__2, &ierr);
+
+/*                 Copy R to WORK(IR) and zero out below it */
+
+		    slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		    i__2 = *n - 1;
+		    i__3 = *n - 1;
+		    slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 1], 
+			    &ldwrkr);
+
+/*                 Generate Q in A */
+/*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__2, &ierr);
+		    ie = itau;
+		    itauq = ie + *n;
+		    itaup = itauq + *n;
+		    iwork = itaup + *n;
+
+/*                 Bidiagonalize R in WORK(IR) */
+/*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+
+/*                 Generate left vectors bidiagonalizing R */
+/*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
+			    work[iwork], &i__2, &ierr);
+		    iwork = ie + *n;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of R in WORK(IR) */
+/*                 (Workspace: need N*N+BDSPAC) */
+
+		    sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, &
+			    c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork]
+			    , info);
+		    iu = ie + *n;
+
+/*                 Multiply Q in A by left singular vectors of R in */
+/*                 WORK(IR), storing result in WORK(IU) and copying to A */
+/*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N) */
+
+		    i__2 = *m;
+		    i__3 = ldwrku;
+		    for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__3) {
+/* Computing MIN */
+			i__4 = *m - i__ + 1;
+			chunk = f2cmin(i__4,ldwrku);
+			sgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + 
+				a_dim1], lda, &work[ir], &ldwrkr, &c_b57, &
+				work[iu], &ldwrku);
+			slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + 
+				a_dim1], lda);
+/* L10: */
+		    }
+
+		} else {
+
+/*                 Insufficient workspace for a fast algorithm */
+
+		    ie = 1;
+		    itauq = ie + *n;
+		    itaup = itauq + *n;
+		    iwork = itaup + *n;
+
+/*                 Bidiagonalize A */
+/*                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__3, &ierr);
+
+/*                 Generate left vectors bidiagonalizing A */
+/*                 (Workspace: need 4*N, prefer 3*N+N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[iwork], &i__3, &ierr);
+		    iwork = ie + *n;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of A in A */
+/*                 (Workspace: need BDSPAC) */
+
+		    sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, &
+			    c__1, &a[a_offset], lda, dum, &c__1, &work[iwork],
+			     info);
+
+		}
+
+	    } else if (wntuo && wntvas) {
+
+/*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */
+/*              N left singular vectors to be overwritten on A and */
+/*              N right singular vectors to be computed in VT */
+
+/* Computing MAX */
+		i__3 = *n << 2;
+		if (*lwork >= *n * *n + f2cmax(i__3,bdspac)) {
+
+/*                 Sufficient workspace for a fast algorithm */
+
+		    ir = 1;
+/* Computing MAX */
+		    i__3 = wrkbl, i__2 = *lda * *n + *n;
+		    if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N */
+
+			ldwrku = *lda;
+			ldwrkr = *lda;
+		    } else /* if(complicated condition) */ {
+/* Computing MAX */
+			i__3 = wrkbl, i__2 = *lda * *n + *n;
+			if (*lwork >= f2cmax(i__3,i__2) + *n * *n) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is N by N */
+
+			    ldwrku = *lda;
+			    ldwrkr = *n;
+			} else {
+
+/*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N */
+
+			    ldwrku = (*lwork - *n * *n - *n) / *n;
+			    ldwrkr = *n;
+			}
+		    }
+		    itau = ir + ldwrkr * *n;
+		    iwork = itau + *n;
+
+/*                 Compute A=Q*R */
+/*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__3, &ierr);
+
+/*                 Copy R to VT, zeroing out below it */
+
+		    slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
+			    ldvt);
+		    if (*n > 1) {
+			i__3 = *n - 1;
+			i__2 = *n - 1;
+			slaset_("L", &i__3, &i__2, &c_b57, &c_b57, &vt[
+				vt_dim1 + 2], ldvt);
+		    }
+
+/*                 Generate Q in A */
+/*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__3, &ierr);
+		    ie = itau;
+		    itauq = ie + *n;
+		    itaup = itauq + *n;
+		    iwork = itaup + *n;
+
+/*                 Bidiagonalize R in VT, copying result to WORK(IR) */
+/*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
+			    work[itauq], &work[itaup], &work[iwork], &i__3, &
+			    ierr);
+		    slacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], &
+			    ldwrkr);
+
+/*                 Generate left vectors bidiagonalizing R in WORK(IR) */
+/*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
+			    work[iwork], &i__3, &ierr);
+
+/*                 Generate right vectors bidiagonalizing R in VT */
+/*                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
+			    &work[iwork], &i__3, &ierr);
+		    iwork = ie + *n;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of R in WORK(IR) and computing right */
+/*                 singular vectors of R in VT */
+/*                 (Workspace: need N*N+BDSPAC) */
+
+		    sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
+			    vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, 
+			    &work[iwork], info);
+		    iu = ie + *n;
+
+/*                 Multiply Q in A by left singular vectors of R in */
+/*                 WORK(IR), storing result in WORK(IU) and copying to A */
+/*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N) */
+
+		    i__3 = *m;
+		    i__2 = ldwrku;
+		    for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
+			     i__2) {
+/* Computing MIN */
+			i__4 = *m - i__ + 1;
+			chunk = f2cmin(i__4,ldwrku);
+			sgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + 
+				a_dim1], lda, &work[ir], &ldwrkr, &c_b57, &
+				work[iu], &ldwrku);
+			slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + 
+				a_dim1], lda);
+/* L20: */
+		    }
+
+		} else {
+
+/*                 Insufficient workspace for a fast algorithm */
+
+		    itau = 1;
+		    iwork = itau + *n;
+
+/*                 Compute A=Q*R */
+/*                 (Workspace: need 2*N, prefer N+N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__2, &ierr);
+
+/*                 Copy R to VT, zeroing out below it */
+
+		    slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
+			    ldvt);
+		    if (*n > 1) {
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
+				vt_dim1 + 2], ldvt);
+		    }
+
+/*                 Generate Q in A */
+/*                 (Workspace: need 2*N, prefer N+N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__2, &ierr);
+		    ie = itau;
+		    itauq = ie + *n;
+		    itaup = itauq + *n;
+		    iwork = itaup + *n;
+
+/*                 Bidiagonalize R in VT */
+/*                 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
+			    work[itauq], &work[itaup], &work[iwork], &i__2, &
+			    ierr);
+
+/*                 Multiply Q in A by left vectors bidiagonalizing R */
+/*                 (Workspace: need 3*N+M, prefer 3*N+M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, &
+			    work[itauq], &a[a_offset], lda, &work[iwork], &
+			    i__2, &ierr);
+
+/*                 Generate right vectors bidiagonalizing R in VT */
+/*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
+			    &work[iwork], &i__2, &ierr);
+		    iwork = ie + *n;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of A in A and computing right */
+/*                 singular vectors of A in VT */
+/*                 (Workspace: need BDSPAC) */
+
+		    sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
+			    vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
+			    work[iwork], info);
+
+		}
+
+	    } else if (wntus) {
+
+		if (wntvn) {
+
+/*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
+/*                 N left singular vectors to be computed in U and */
+/*                 no right singular vectors to be computed */
+
+/* Computing MAX */
+		    i__2 = *n << 2;
+		    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			ir = 1;
+			if (*lwork >= wrkbl + *lda * *n) {
+
+/*                       WORK(IR) is LDA by N */
+
+			    ldwrkr = *lda;
+			} else {
+
+/*                       WORK(IR) is N by N */
+
+			    ldwrkr = *n;
+			}
+			itau = ir + ldwrkr * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R */
+/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IR), zeroing out below it */
+
+			slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
+				ldwrkr);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+				1], &ldwrkr);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IR) */
+/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Generate left vectors bidiagonalizing R in WORK(IR) */
+/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IR) */
+/*                    (Workspace: need N*N+BDSPAC) */
+
+			sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 
+				dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply Q in A by left singular vectors of R in */
+/*                    WORK(IR), storing result in U */
+/*                    (Workspace: need N*N) */
+
+			sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
+				work[ir], &ldwrkr, &c_b57, &u[u_offset], ldu);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need 2*N, prefer N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Zero out below R in A */
+
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
+				    a_dim1 + 2], lda);
+			}
+
+/*                    Bidiagonalize R in A */
+/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left vectors bidiagonalizing R */
+/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
+				work[itauq], &u[u_offset], ldu, &work[iwork], 
+				&i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 
+				dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
+				work[iwork], info);
+
+		    }
+
+		} else if (wntvo) {
+
+/*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
+/*                 N left singular vectors to be computed in U and */
+/*                 N right singular vectors to be overwritten on A */
+
+/* Computing MAX */
+		    i__2 = *n << 2;
+		    if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + (*lda << 1) * *n) {
+
+/*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *lda;
+			} else if (*lwork >= wrkbl + (*lda + *n) * *n) {
+
+/*                       WORK(IU) is LDA by N and WORK(IR) is N by N */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *n;
+			} else {
+
+/*                       WORK(IU) is N by N and WORK(IR) is N by N */
+
+			    ldwrku = *n;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *n;
+			}
+			itau = ir + ldwrkr * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R */
+/*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IU), zeroing out below it */
+
+			slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				1], &ldwrku);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IU), copying result to */
+/*                    WORK(IR) */
+/*                    (Workspace: need 2*N*N+4*N, */
+/*                                prefer 2*N*N+3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
+				ldwrkr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need 2*N*N+4*N-1, */
+/*                                prefer 2*N*N+3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IU) and computing */
+/*                    right singular vectors of R in WORK(IR) */
+/*                    (Workspace: need 2*N*N+BDSPAC) */
+
+			sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
+				ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 
+				&work[iwork], info);
+
+/*                    Multiply Q in A by left singular vectors of R in */
+/*                    WORK(IU), storing result in U */
+/*                    (Workspace: need N*N) */
+
+			sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
+				work[iu], &ldwrku, &c_b57, &u[u_offset], ldu);
+
+/*                    Copy right singular vectors of R to A */
+/*                    (Workspace: need N*N) */
+
+			slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
+				lda);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need 2*N, prefer N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Zero out below R in A */
+
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
+				    a_dim1 + 2], lda);
+			}
+
+/*                    Bidiagonalize R in A */
+/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left vectors bidiagonalizing R */
+/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
+				work[itauq], &u[u_offset], ldu, &work[iwork], 
+				&i__2, &ierr)
+				;
+
+/*                    Generate right vectors bidiagonalizing R in A */
+/*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in A */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
+				a_offset], lda, &u[u_offset], ldu, dum, &c__1,
+				 &work[iwork], info);
+
+		    }
+
+		} else if (wntvas) {
+
+/*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' */
+/*                         or 'A') */
+/*                 N left singular vectors to be computed in U and */
+/*                 N right singular vectors to be computed in VT */
+
+/* Computing MAX */
+		    i__2 = *n << 2;
+		    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + *lda * *n) {
+
+/*                       WORK(IU) is LDA by N */
+
+			    ldwrku = *lda;
+			} else {
+
+/*                       WORK(IU) is N by N */
+
+			    ldwrku = *n;
+			}
+			itau = iu + ldwrku * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R */
+/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IU), zeroing out below it */
+
+			slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				1], &ldwrku);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IU), copying result to VT */
+/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
+				 ldvt);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in VT */
+/*                    (Workspace: need N*N+4*N-1, */
+/*                                prefer N*N+3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
+				itaup], &work[iwork], &i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IU) and computing */
+/*                    right singular vectors of R in VT */
+/*                    (Workspace: need N*N+BDSPAC) */
+
+			sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &work[iu], &ldwrku, dum, &
+				c__1, &work[iwork], info);
+
+/*                    Multiply Q in A by left singular vectors of R in */
+/*                    WORK(IU), storing result in U */
+/*                    (Workspace: need N*N) */
+
+			sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
+				work[iu], &ldwrku, &c_b57, &u[u_offset], ldu);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need 2*N, prefer N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy R to VT, zeroing out below it */
+
+			slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
+				    vt_dim1 + 2], ldvt);
+			}
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in VT */
+/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 
+				&work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left bidiagonalizing vectors */
+/*                    in VT */
+/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
+				&work[itauq], &u[u_offset], ldu, &work[iwork],
+				 &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in VT */
+/*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
+				itaup], &work[iwork], &i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &u[u_offset], ldu, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		}
+
+	    } else if (wntua) {
+
+		if (wntvn) {
+
+/*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
+/*                 M left singular vectors to be computed in U and */
+/*                 no right singular vectors to be computed */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			ir = 1;
+			if (*lwork >= wrkbl + *lda * *n) {
+
+/*                       WORK(IR) is LDA by N */
+
+			    ldwrkr = *lda;
+			} else {
+
+/*                       WORK(IR) is N by N */
+
+			    ldwrkr = *n;
+			}
+			itau = ir + ldwrkr * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Copy R to WORK(IR), zeroing out below it */
+
+			slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
+				ldwrkr);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+				1], &ldwrkr);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IR) */
+/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IR) */
+/*                    (Workspace: need N*N+BDSPAC) */
+
+			sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 
+				dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply Q in U by left singular vectors of R in */
+/*                    WORK(IR), storing result in A */
+/*                    (Workspace: need N*N) */
+
+			sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
+				work[ir], &ldwrkr, &c_b57, &a[a_offset], lda);
+
+/*                    Copy left singular vectors of A from A to U */
+
+			slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N+M, prefer N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Zero out below R in A */
+
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
+				    a_dim1 + 2], lda);
+			}
+
+/*                    Bidiagonalize R in A */
+/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left bidiagonalizing vectors */
+/*                    in A */
+/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
+				work[itauq], &u[u_offset], ldu, &work[iwork], 
+				&i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 
+				dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
+				work[iwork], info);
+
+		    }
+
+		} else if (wntvo) {
+
+/*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
+/*                 M left singular vectors to be computed in U and */
+/*                 N right singular vectors to be overwritten on A */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + (*lda << 1) * *n) {
+
+/*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *lda;
+			} else if (*lwork >= wrkbl + (*lda + *n) * *n) {
+
+/*                       WORK(IU) is LDA by N and WORK(IR) is N by N */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *n;
+			} else {
+
+/*                       WORK(IU) is N by N and WORK(IR) is N by N */
+
+			    ldwrku = *n;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *n;
+			}
+			itau = ir + ldwrkr * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IU), zeroing out below it */
+
+			slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				1], &ldwrku);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IU), copying result to */
+/*                    WORK(IR) */
+/*                    (Workspace: need 2*N*N+4*N, */
+/*                                prefer 2*N*N+3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
+				ldwrkr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need 2*N*N+4*N-1, */
+/*                                prefer 2*N*N+3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IU) and computing */
+/*                    right singular vectors of R in WORK(IR) */
+/*                    (Workspace: need 2*N*N+BDSPAC) */
+
+			sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
+				ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 
+				&work[iwork], info);
+
+/*                    Multiply Q in U by left singular vectors of R in */
+/*                    WORK(IU), storing result in A */
+/*                    (Workspace: need N*N) */
+
+			sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
+				work[iu], &ldwrku, &c_b57, &a[a_offset], lda);
+
+/*                    Copy left singular vectors of A from A to U */
+
+			slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Copy right singular vectors of R from WORK(IR) to A */
+
+			slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
+				lda);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N+M, prefer N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Zero out below R in A */
+
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
+				    a_dim1 + 2], lda);
+			}
+
+/*                    Bidiagonalize R in A */
+/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left bidiagonalizing vectors */
+/*                    in A */
+/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
+				work[itauq], &u[u_offset], ldu, &work[iwork], 
+				&i__2, &ierr)
+				;
+
+/*                    Generate right bidiagonalizing vectors in A */
+/*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in A */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
+				a_offset], lda, &u[u_offset], ldu, dum, &c__1,
+				 &work[iwork], info);
+
+		    }
+
+		} else if (wntvas) {
+
+/*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' */
+/*                         or 'A') */
+/*                 M left singular vectors to be computed in U and */
+/*                 N right singular vectors to be computed in VT */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + *lda * *n) {
+
+/*                       WORK(IU) is LDA by N */
+
+			    ldwrku = *lda;
+			} else {
+
+/*                       WORK(IU) is N by N */
+
+			    ldwrku = *n;
+			}
+			itau = iu + ldwrku * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IU), zeroing out below it */
+
+			slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				1], &ldwrku);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IU), copying result to VT */
+/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
+				 ldvt);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in VT */
+/*                    (Workspace: need N*N+4*N-1, */
+/*                                prefer N*N+3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
+				itaup], &work[iwork], &i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IU) and computing */
+/*                    right singular vectors of R in VT */
+/*                    (Workspace: need N*N+BDSPAC) */
+
+			sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &work[iu], &ldwrku, dum, &
+				c__1, &work[iwork], info);
+
+/*                    Multiply Q in U by left singular vectors of R in */
+/*                    WORK(IU), storing result in A */
+/*                    (Workspace: need N*N) */
+
+			sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
+				work[iu], &ldwrku, &c_b57, &a[a_offset], lda);
+
+/*                    Copy left singular vectors of A from A to U */
+
+			slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N+M, prefer N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy R from A to VT, zeroing out below it */
+
+			slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
+				    vt_dim1 + 2], ldvt);
+			}
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in VT */
+/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 
+				&work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left bidiagonalizing vectors */
+/*                    in VT */
+/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
+				&work[itauq], &u[u_offset], ldu, &work[iwork],
+				 &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in VT */
+/*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
+				itaup], &work[iwork], &i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &u[u_offset], ldu, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		}
+
+	    }
+
+	} else {
+
+/*           M .LT. MNTHR */
+
+/*           Path 10 (M at least N, but not much larger) */
+/*           Reduce to bidiagonal form without QR decomposition */
+
+	    ie = 1;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    iwork = itaup + *n;
+
+/*           Bidiagonalize A */
+/*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[iwork], &i__2, &ierr);
+	    if (wntuas) {
+
+/*              If left singular vectors desired in U, copy result to U */
+/*              and generate left bidiagonalizing vectors in U */
+/*              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) */
+
+		slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		if (wntus) {
+		    ncu = *n;
+		}
+		if (wntua) {
+		    ncu = *m;
+		}
+		i__2 = *lwork - iwork + 1;
+		sorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], &
+			work[iwork], &i__2, &ierr);
+	    }
+	    if (wntvas) {
+
+/*              If right singular vectors desired in VT, copy result to */
+/*              VT and generate right bidiagonalizing vectors in VT */
+/*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+		slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - iwork + 1;
+		sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[iwork], &i__2, &ierr);
+	    }
+	    if (wntuo) {
+
+/*              If left singular vectors desired in A, generate left */
+/*              bidiagonalizing vectors in A */
+/*              (Workspace: need 4*N, prefer 3*N+N*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    if (wntvo) {
+
+/*              If right singular vectors desired in A, generate right */
+/*              bidiagonalizing vectors in A */
+/*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    iwork = ie + *n;
+	    if (wntuas || wntuo) {
+		nru = *m;
+	    }
+	    if (wntun) {
+		nru = 0;
+	    }
+	    if (wntvas || wntvo) {
+		ncvt = *n;
+	    }
+	    if (wntvn) {
+		ncvt = 0;
+	    }
+	    if (! wntuo && ! wntvo) {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in U and computing right singular */
+/*              vectors in VT */
+/*              (Workspace: need BDSPAC) */
+
+		sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
+			vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
+			work[iwork], info);
+	    } else if (! wntuo && wntvo) {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in U and computing right singular */
+/*              vectors in A */
+/*              (Workspace: need BDSPAC) */
+
+		sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
+			a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
+			iwork], info);
+	    } else {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in A and computing right singular */
+/*              vectors in VT */
+/*              (Workspace: need BDSPAC) */
+
+		sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
+			vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
+			work[iwork], info);
+	    }
+
+	}
+
+    } else {
+
+/*        A has more columns than rows. If A has sufficiently more */
+/*        columns than rows, first reduce using the LQ decomposition (if */
+/*        sufficient workspace available) */
+
+	if (*n >= mnthr) {
+
+	    if (wntvn) {
+
+/*              Path 1t(N much larger than M, JOBVT='N') */
+/*              No right singular vectors to be computed */
+
+		itau = 1;
+		iwork = itau + *m;
+
+/*              Compute A=L*Q */
+/*              (Workspace: need 2*M, prefer M+M*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
+			i__2, &ierr);
+
+/*              Zero out above L */
+
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 << 1) + 
+			1], lda);
+		ie = 1;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		iwork = itaup + *m;
+
+/*              Bidiagonalize L in A */
+/*              (Workspace: need 4*M, prefer 3*M+2*M*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+		if (wntuo || wntuas) {
+
+/*                 If left singular vectors desired, generate Q */
+/*                 (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], &
+			    work[iwork], &i__2, &ierr);
+		}
+		iwork = ie + *m;
+		nru = 0;
+		if (wntuo || wntuas) {
+		    nru = *m;
+		}
+
+/*              Perform bidiagonal QR iteration, computing left singular */
+/*              vectors of A in A if desired */
+/*              (Workspace: need BDSPAC) */
+
+		sbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, &
+			c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], 
+			info);
+
+/*              If left singular vectors desired in U, copy them there */
+
+		if (wntuas) {
+		    slacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		}
+
+	    } else if (wntvo && wntun) {
+
+/*              Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
+/*              M right singular vectors to be overwritten on A and */
+/*              no left singular vectors to be computed */
+
+/* Computing MAX */
+		i__2 = *m << 2;
+		if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                 Sufficient workspace for a fast algorithm */
+
+		    ir = 1;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *lda * *n + *m;
+		    if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */
+
+			ldwrku = *lda;
+			chunk = *n;
+			ldwrkr = *lda;
+		    } else /* if(complicated condition) */ {
+/* Computing MAX */
+			i__2 = wrkbl, i__3 = *lda * *n + *m;
+			if (*lwork >= f2cmax(i__2,i__3) + *m * *m) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is M by M */
+
+			    ldwrku = *lda;
+			    chunk = *n;
+			    ldwrkr = *m;
+			} else {
+
+/*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */
+
+			    ldwrku = *m;
+			    chunk = (*lwork - *m * *m - *m) / *m;
+			    ldwrkr = *m;
+			}
+		    }
+		    itau = ir + ldwrkr * *m;
+		    iwork = itau + *m;
+
+/*                 Compute A=L*Q */
+/*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__2, &ierr);
+
+/*                 Copy L to WORK(IR) and zero out above it */
+
+		    slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr);
+		    i__2 = *m - 1;
+		    i__3 = *m - 1;
+		    slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+			    ldwrkr], &ldwrkr);
+
+/*                 Generate Q in A */
+/*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__2, &ierr);
+		    ie = itau;
+		    itauq = ie + *m;
+		    itaup = itauq + *m;
+		    iwork = itaup + *m;
+
+/*                 Bidiagonalize L in WORK(IR) */
+/*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+
+/*                 Generate right vectors bidiagonalizing L */
+/*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
+			    work[iwork], &i__2, &ierr);
+		    iwork = ie + *m;
+
+/*                 Perform bidiagonal QR iteration, computing right */
+/*                 singular vectors of L in WORK(IR) */
+/*                 (Workspace: need M*M+BDSPAC) */
+
+		    sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[
+			    ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork]
+			    , info);
+		    iu = ie + *m;
+
+/*                 Multiply right singular vectors of L in WORK(IR) by Q */
+/*                 in A, storing result in WORK(IU) and copying to A */
+/*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M) */
+
+		    i__2 = *n;
+		    i__3 = chunk;
+		    for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__3) {
+/* Computing MIN */
+			i__4 = *n - i__ + 1;
+			blk = f2cmin(i__4,chunk);
+			sgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], &
+				ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, &
+				work[iu], &ldwrku);
+			slacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * 
+				a_dim1 + 1], lda);
+/* L30: */
+		    }
+
+		} else {
+
+/*                 Insufficient workspace for a fast algorithm */
+
+		    ie = 1;
+		    itauq = ie + *m;
+		    itaup = itauq + *m;
+		    iwork = itaup + *m;
+
+/*                 Bidiagonalize A */
+/*                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__3, &ierr);
+
+/*                 Generate right vectors bidiagonalizing A */
+/*                 (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[iwork], &i__3, &ierr);
+		    iwork = ie + *m;
+
+/*                 Perform bidiagonal QR iteration, computing right */
+/*                 singular vectors of A in A */
+/*                 (Workspace: need BDSPAC) */
+
+		    sbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[
+			    a_offset], lda, dum, &c__1, dum, &c__1, &work[
+			    iwork], info);
+
+		}
+
+	    } else if (wntvo && wntuas) {
+
+/*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */
+/*              M right singular vectors to be overwritten on A and */
+/*              M left singular vectors to be computed in U */
+
+/* Computing MAX */
+		i__3 = *m << 2;
+		if (*lwork >= *m * *m + f2cmax(i__3,bdspac)) {
+
+/*                 Sufficient workspace for a fast algorithm */
+
+		    ir = 1;
+/* Computing MAX */
+		    i__3 = wrkbl, i__2 = *lda * *n + *m;
+		    if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */
+
+			ldwrku = *lda;
+			chunk = *n;
+			ldwrkr = *lda;
+		    } else /* if(complicated condition) */ {
+/* Computing MAX */
+			i__3 = wrkbl, i__2 = *lda * *n + *m;
+			if (*lwork >= f2cmax(i__3,i__2) + *m * *m) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is M by M */
+
+			    ldwrku = *lda;
+			    chunk = *n;
+			    ldwrkr = *m;
+			} else {
+
+/*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */
+
+			    ldwrku = *m;
+			    chunk = (*lwork - *m * *m - *m) / *m;
+			    ldwrkr = *m;
+			}
+		    }
+		    itau = ir + ldwrkr * *m;
+		    iwork = itau + *m;
+
+/*                 Compute A=L*Q */
+/*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__3, &ierr);
+
+/*                 Copy L to U, zeroing about above it */
+
+		    slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		    i__3 = *m - 1;
+		    i__2 = *m - 1;
+		    slaset_("U", &i__3, &i__2, &c_b57, &c_b57, &u[(u_dim1 << 
+			    1) + 1], ldu);
+
+/*                 Generate Q in A */
+/*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__3, &ierr);
+		    ie = itau;
+		    itauq = ie + *m;
+		    itaup = itauq + *m;
+		    iwork = itaup + *m;
+
+/*                 Bidiagonalize L in U, copying result to WORK(IR) */
+/*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__3, &ierr);
+		    slacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr);
+
+/*                 Generate right vectors bidiagonalizing L in WORK(IR) */
+/*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
+			    work[iwork], &i__3, &ierr);
+
+/*                 Generate left vectors bidiagonalizing L in U */
+/*                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
+			    work[iwork], &i__3, &ierr);
+		    iwork = ie + *m;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of L in U, and computing right */
+/*                 singular vectors of L in WORK(IR) */
+/*                 (Workspace: need M*M+BDSPAC) */
+
+		    sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], 
+			    &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[
+			    iwork], info);
+		    iu = ie + *m;
+
+/*                 Multiply right singular vectors of L in WORK(IR) by Q */
+/*                 in A, storing result in WORK(IU) and copying to A */
+/*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)) */
+
+		    i__3 = *n;
+		    i__2 = chunk;
+		    for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
+			     i__2) {
+/* Computing MIN */
+			i__4 = *n - i__ + 1;
+			blk = f2cmin(i__4,chunk);
+			sgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], &
+				ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, &
+				work[iu], &ldwrku);
+			slacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * 
+				a_dim1 + 1], lda);
+/* L40: */
+		    }
+
+		} else {
+
+/*                 Insufficient workspace for a fast algorithm */
+
+		    itau = 1;
+		    iwork = itau + *m;
+
+/*                 Compute A=L*Q */
+/*                 (Workspace: need 2*M, prefer M+M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__2, &ierr);
+
+/*                 Copy L to U, zeroing out above it */
+
+		    slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		    i__2 = *m - 1;
+		    i__3 = *m - 1;
+		    slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 << 
+			    1) + 1], ldu);
+
+/*                 Generate Q in A */
+/*                 (Workspace: need 2*M, prefer M+M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__2, &ierr);
+		    ie = itau;
+		    itauq = ie + *m;
+		    itaup = itauq + *m;
+		    iwork = itaup + *m;
+
+/*                 Bidiagonalize L in U */
+/*                 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+
+/*                 Multiply right vectors bidiagonalizing L by Q in A */
+/*                 (Workspace: need 3*M+N, prefer 3*M+N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[
+			    itaup], &a[a_offset], lda, &work[iwork], &i__2, &
+			    ierr);
+
+/*                 Generate left vectors bidiagonalizing L in U */
+/*                 (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
+			    work[iwork], &i__2, &ierr);
+		    iwork = ie + *m;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of A in U and computing right */
+/*                 singular vectors of A in A */
+/*                 (Workspace: need BDSPAC) */
+
+		    sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[
+			    a_offset], lda, &u[u_offset], ldu, dum, &c__1, &
+			    work[iwork], info);
+
+		}
+
+	    } else if (wntvs) {
+
+		if (wntun) {
+
+/*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
+/*                 M right singular vectors to be computed in VT and */
+/*                 no left singular vectors to be computed */
+
+/* Computing MAX */
+		    i__2 = *m << 2;
+		    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			ir = 1;
+			if (*lwork >= wrkbl + *lda * *m) {
+
+/*                       WORK(IR) is LDA by M */
+
+			    ldwrkr = *lda;
+			} else {
+
+/*                       WORK(IR) is M by M */
+
+			    ldwrkr = *m;
+			}
+			itau = ir + ldwrkr * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q */
+/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IR), zeroing out above it */
+
+			slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
+				ldwrkr);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+				ldwrkr], &ldwrkr);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IR) */
+/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Generate right vectors bidiagonalizing L in */
+/*                    WORK(IR) */
+/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing right */
+/*                    singular vectors of L in WORK(IR) */
+/*                    (Workspace: need M*M+BDSPAC) */
+
+			sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
+				work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IR) by */
+/*                    Q in A, storing result in VT */
+/*                    (Workspace: need M*M) */
+
+			sgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, 
+				&a[a_offset], lda, &c_b57, &vt[vt_offset], 
+				ldvt);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q */
+/*                    (Workspace: need 2*M, prefer M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy result to VT */
+
+			slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need 2*M, prefer M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Zero out above L in A */
+
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
+				<< 1) + 1], lda);
+
+/*                    Bidiagonalize L in A */
+/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right vectors bidiagonalizing L by Q in VT */
+/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
+				vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
+				work[iwork], info);
+
+		    }
+
+		} else if (wntuo) {
+
+/*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
+/*                 M right singular vectors to be computed in VT and */
+/*                 M left singular vectors to be overwritten on A */
+
+/* Computing MAX */
+		    i__2 = *m << 2;
+		    if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + (*lda << 1) * *m) {
+
+/*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *lda;
+			} else if (*lwork >= wrkbl + (*lda + *m) * *m) {
+
+/*                       WORK(IU) is LDA by M and WORK(IR) is M by M */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *m;
+			} else {
+
+/*                       WORK(IU) is M by M and WORK(IR) is M by M */
+
+			    ldwrku = *m;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *m;
+			}
+			itau = ir + ldwrkr * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q */
+/*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IU), zeroing out below it */
+
+			slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				ldwrku], &ldwrku);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IU), copying result to */
+/*                    WORK(IR) */
+/*                    (Workspace: need 2*M*M+4*M, */
+/*                                prefer 2*M*M+3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
+				ldwrkr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need 2*M*M+4*M-1, */
+/*                                prefer 2*M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of L in WORK(IR) and computing */
+/*                    right singular vectors of L in WORK(IU) */
+/*                    (Workspace: need 2*M*M+BDSPAC) */
+
+			sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
+				iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 
+				&work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IU) by */
+/*                    Q in A, storing result in VT */
+/*                    (Workspace: need M*M) */
+
+			sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
+				&a[a_offset], lda, &c_b57, &vt[vt_offset], 
+				ldvt);
+
+/*                    Copy left singular vectors of L to A */
+/*                    (Workspace: need M*M) */
+
+			slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
+				lda);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need 2*M, prefer M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Zero out above L in A */
+
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
+				<< 1) + 1], lda);
+
+/*                    Bidiagonalize L in A */
+/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right vectors bidiagonalizing L by Q in VT */
+/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors of L in A */
+/*                    (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, compute left */
+/*                    singular vectors of A in A and compute right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &a[a_offset], lda, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		} else if (wntuas) {
+
+/*                 Path 6t(N much larger than M, JOBU='S' or 'A', */
+/*                         JOBVT='S') */
+/*                 M right singular vectors to be computed in VT and */
+/*                 M left singular vectors to be computed in U */
+
+/* Computing MAX */
+		    i__2 = *m << 2;
+		    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + *lda * *m) {
+
+/*                       WORK(IU) is LDA by N */
+
+			    ldwrku = *lda;
+			} else {
+
+/*                       WORK(IU) is LDA by M */
+
+			    ldwrku = *m;
+			}
+			itau = iu + ldwrku * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q */
+/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IU), zeroing out above it */
+
+			slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				ldwrku], &ldwrku);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IU), copying result to U */
+/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
+				ldu);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need M*M+4*M-1, */
+/*                                prefer M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in U */
+/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of L in U and computing right */
+/*                    singular vectors of L in WORK(IU) */
+/*                    (Workspace: need M*M+BDSPAC) */
+
+			sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
+				iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IU) by */
+/*                    Q in A, storing result in VT */
+/*                    (Workspace: need M*M) */
+
+			sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
+				&a[a_offset], lda, &c_b57, &vt[vt_offset], 
+				ldvt);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need 2*M, prefer M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy L to U, zeroing out above it */
+
+			slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 
+				<< 1) + 1], ldu);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in U */
+/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right bidiagonalizing vectors in U by Q */
+/*                    in VT */
+/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in U */
+/*                    (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &u[u_offset], ldu, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		}
+
+	    } else if (wntva) {
+
+		if (wntun) {
+
+/*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
+/*                 N right singular vectors to be computed in VT and */
+/*                 no left singular vectors to be computed */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			ir = 1;
+			if (*lwork >= wrkbl + *lda * *m) {
+
+/*                       WORK(IR) is LDA by M */
+
+			    ldwrkr = *lda;
+			} else {
+
+/*                       WORK(IR) is M by M */
+
+			    ldwrkr = *m;
+			}
+			itau = ir + ldwrkr * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Copy L to WORK(IR), zeroing out above it */
+
+			slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
+				ldwrkr);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+				ldwrkr], &ldwrkr);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IR) */
+/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need M*M+4*M-1, */
+/*                                prefer M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing right */
+/*                    singular vectors of L in WORK(IR) */
+/*                    (Workspace: need M*M+BDSPAC) */
+
+			sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
+				work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IR) by */
+/*                    Q in VT, storing result in A */
+/*                    (Workspace: need M*M) */
+
+			sgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, 
+				&vt[vt_offset], ldvt, &c_b57, &a[a_offset], 
+				lda);
+
+/*                    Copy right singular vectors of A from A to VT */
+
+			slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M+N, prefer M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Zero out above L in A */
+
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
+				<< 1) + 1], lda);
+
+/*                    Bidiagonalize L in A */
+/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right bidiagonalizing vectors in A by Q */
+/*                    in VT */
+/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
+				vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
+				work[iwork], info);
+
+		    }
+
+		} else if (wntuo) {
+
+/*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
+/*                 N right singular vectors to be computed in VT and */
+/*                 M left singular vectors to be overwritten on A */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + (*lda << 1) * *m) {
+
+/*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *lda;
+			} else if (*lwork >= wrkbl + (*lda + *m) * *m) {
+
+/*                       WORK(IU) is LDA by M and WORK(IR) is M by M */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *m;
+			} else {
+
+/*                       WORK(IU) is M by M and WORK(IR) is M by M */
+
+			    ldwrku = *m;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *m;
+			}
+			itau = ir + ldwrkr * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IU), zeroing out above it */
+
+			slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				ldwrku], &ldwrku);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IU), copying result to */
+/*                    WORK(IR) */
+/*                    (Workspace: need 2*M*M+4*M, */
+/*                                prefer 2*M*M+3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
+				ldwrkr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need 2*M*M+4*M-1, */
+/*                                prefer 2*M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of L in WORK(IR) and computing */
+/*                    right singular vectors of L in WORK(IU) */
+/*                    (Workspace: need 2*M*M+BDSPAC) */
+
+			sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
+				iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 
+				&work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IU) by */
+/*                    Q in VT, storing result in A */
+/*                    (Workspace: need M*M) */
+
+			sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
+				&vt[vt_offset], ldvt, &c_b57, &a[a_offset], 
+				lda);
+
+/*                    Copy right singular vectors of A from A to VT */
+
+			slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Copy left singular vectors of A from WORK(IR) to A */
+
+			slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
+				lda);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M+N, prefer M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Zero out above L in A */
+
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
+				<< 1) + 1], lda);
+
+/*                    Bidiagonalize L in A */
+/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right bidiagonalizing vectors in A by Q */
+/*                    in VT */
+/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in A */
+/*                    (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in A and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &a[a_offset], lda, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		} else if (wntuas) {
+
+/*                 Path 9t(N much larger than M, JOBU='S' or 'A', */
+/*                         JOBVT='A') */
+/*                 N right singular vectors to be computed in VT and */
+/*                 M left singular vectors to be computed in U */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + *lda * *m) {
+
+/*                       WORK(IU) is LDA by M */
+
+			    ldwrku = *lda;
+			} else {
+
+/*                       WORK(IU) is M by M */
+
+			    ldwrku = *m;
+			}
+			itau = iu + ldwrku * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IU), zeroing out above it */
+
+			slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				ldwrku], &ldwrku);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IU), copying result to U */
+/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
+				ldu);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in U */
+/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of L in U and computing right */
+/*                    singular vectors of L in WORK(IU) */
+/*                    (Workspace: need M*M+BDSPAC) */
+
+			sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
+				iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IU) by */
+/*                    Q in VT, storing result in A */
+/*                    (Workspace: need M*M) */
+
+			sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
+				&vt[vt_offset], ldvt, &c_b57, &a[a_offset], 
+				lda);
+
+/*                    Copy right singular vectors of A from A to VT */
+
+			slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M+N, prefer M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy L to U, zeroing out above it */
+
+			slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 
+				<< 1) + 1], ldu);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in U */
+/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right bidiagonalizing vectors in U by Q */
+/*                    in VT */
+/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in U */
+/*                    (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &u[u_offset], ldu, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		}
+
+	    }
+
+	} else {
+
+/*           N .LT. MNTHR */
+
+/*           Path 10t(N greater than M, but not much larger) */
+/*           Reduce to bidiagonal form without LQ decomposition */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    iwork = itaup + *m;
+
+/*           Bidiagonalize A */
+/*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[iwork], &i__2, &ierr);
+	    if (wntuas) {
+
+/*              If left singular vectors desired in U, copy result to U */
+/*              and generate left bidiagonalizing vectors in U */
+/*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */
+
+		slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__2 = *lwork - iwork + 1;
+		sorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    if (wntvas) {
+
+/*              If right singular vectors desired in VT, copy result to */
+/*              VT and generate right bidiagonalizing vectors in VT */
+/*              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) */
+
+		slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		if (wntva) {
+		    nrvt = *n;
+		}
+		if (wntvs) {
+		    nrvt = *m;
+		}
+		i__2 = *lwork - iwork + 1;
+		sorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], 
+			&work[iwork], &i__2, &ierr);
+	    }
+	    if (wntuo) {
+
+/*              If left singular vectors desired in A, generate left */
+/*              bidiagonalizing vectors in A */
+/*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		sorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    if (wntvo) {
+
+/*              If right singular vectors desired in A, generate right */
+/*              bidiagonalizing vectors in A */
+/*              (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    iwork = ie + *m;
+	    if (wntuas || wntuo) {
+		nru = *m;
+	    }
+	    if (wntun) {
+		nru = 0;
+	    }
+	    if (wntvas || wntvo) {
+		ncvt = *n;
+	    }
+	    if (wntvn) {
+		ncvt = 0;
+	    }
+	    if (! wntuo && ! wntvo) {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in U and computing right singular */
+/*              vectors in VT */
+/*              (Workspace: need BDSPAC) */
+
+		sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
+			vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
+			work[iwork], info);
+	    } else if (! wntuo && wntvo) {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in U and computing right singular */
+/*              vectors in A */
+/*              (Workspace: need BDSPAC) */
+
+		sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
+			a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
+			iwork], info);
+	    } else {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in A and computing right singular */
+/*              vectors in VT */
+/*              (Workspace: need BDSPAC) */
+
+		sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
+			vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
+			work[iwork], info);
+	    }
+
+	}
+
+    }
+
+/*     If SBDSQR failed to converge, copy unconverged superdiagonals */
+/*     to WORK( 2:MINMN ) */
+
+    if (*info != 0) {
+	if (ie > 2) {
+	    i__2 = minmn - 1;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__ + 1] = work[i__ + ie - 1];
+/* L50: */
+	    }
+	}
+	if (ie < 2) {
+	    for (i__ = minmn - 1; i__ >= 1; --i__) {
+		work[i__ + 1] = work[i__ + ie - 1];
+/* L60: */
+	    }
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (*info != 0 && anrm > bignum) {
+	    i__2 = minmn - 1;
+	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2],
+		     &minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (*info != 0 && anrm < smlnum) {
+	    i__2 = minmn - 1;
+	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2],
+		     &minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (real) maxwrk;
+
+    return 0;
+
+/*     End of SGESVD */
+
+} /* sgesvd_ */
+
diff --git a/lapack-netlib/SRC/sgesvdq.c b/lapack-netlib/SRC/sgesvdq.c
new file mode 100644
index 000000000..e2de2d961
--- /dev/null
+++ b/lapack-netlib/SRC/sgesvdq.c
@@ -0,0 +1,2124 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__1 = 1;
+static real c_b72 = 0.f;
+static real c_b76 = 1.f;
+static integer c__0 = 0;
+static logical c_false = FALSE_;
+
+/* > \brief <b> SGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method
+ for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGESVDQ + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvdq
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvdq
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvdq
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE SGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, */
+/*                          S, U, LDU, V, LDV, NUMRANK, IWORK, LIWORK, */
+/*                          WORK, LWORK, RWORK, LRWORK, INFO ) */
+
+/*      IMPLICIT    NONE */
+/*      CHARACTER   JOBA, JOBP, JOBR, JOBU, JOBV */
+/*      INTEGER     M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LWORK, LRWORK, */
+/*                  INFO */
+/*      REAL        A( LDA, * ), U( LDU, * ), V( LDV, * ), WORK( * ) */
+/*      REAL        S( * ), RWORK( * ) */
+/*      INTEGER     IWORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGESVDQ computes the singular value decomposition (SVD) of a real */
+/* > M-by-N matrix A, where M >= N. The SVD of A is written as */
+/* >                                    [++]   [xx]   [x0]   [xx] */
+/* >              A = U * SIGMA * V^*,  [++] = [xx] * [ox] * [xx] */
+/* >                                    [++]   [xx] */
+/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */
+/* > matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */
+/* > of SIGMA are the singular values of A. The columns of U and V are the */
+/* > left and the right singular vectors of A, respectively. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBA */
+/* > \verbatim */
+/* >  JOBA is CHARACTER*1 */
+/* >  Specifies the level of accuracy in the computed SVD */
+/* >  = 'A' The requested accuracy corresponds to having the backward */
+/* >        error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F, */
+/* >        where EPS = SLAMCH('Epsilon'). This authorises CGESVDQ to */
+/* >        truncate the computed triangular factor in a rank revealing */
+/* >        QR factorization whenever the truncated part is below the */
+/* >        threshold of the order of EPS * ||A||_F. This is aggressive */
+/* >        truncation level. */
+/* >  = 'M' Similarly as with 'A', but the truncation is more gentle: it */
+/* >        is allowed only when there is a drop on the diagonal of the */
+/* >        triangular factor in the QR factorization. This is medium */
+/* >        truncation level. */
+/* >  = 'H' High accuracy requested. No numerical rank determination based */
+/* >        on the rank revealing QR factorization is attempted. */
+/* >  = 'E' Same as 'H', and in addition the condition number of column */
+/* >        scaled A is estimated and returned in  RWORK(1). */
+/* >        N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBP */
+/* > \verbatim */
+/* >  JOBP is CHARACTER*1 */
+/* >  = 'P' The rows of A are ordered in decreasing order with respect to */
+/* >        ||A(i,:)||_\infty. This enhances numerical accuracy at the cost */
+/* >        of extra data movement. Recommended for numerical robustness. */
+/* >  = 'N' No row pivoting. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBR */
+/* > \verbatim */
+/* >          JOBR is CHARACTER*1 */
+/* >          = 'T' After the initial pivoted QR factorization, SGESVD is applied to */
+/* >          the transposed R**T of the computed triangular factor R. This involves */
+/* >          some extra data movement (matrix transpositions). Useful for */
+/* >          experiments, research and development. */
+/* >          = 'N' The triangular factor R is given as input to SGESVD. This may be */
+/* >          preferred as it involves less data movement. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          = 'A' All M left singular vectors are computed and returned in the */
+/* >          matrix U. See the description of U. */
+/* >          = 'S' or 'U' N = f2cmin(M,N) left singular vectors are computed and returned */
+/* >          in the matrix U. See the description of U. */
+/* >          = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular */
+/* >          vectors are computed and returned in the matrix U. */
+/* >          = 'F' The N left singular vectors are returned in factored form as the */
+/* >          product of the Q factor from the initial QR factorization and the */
+/* >          N left singular vectors of (R**T , 0)**T. If row pivoting is used, */
+/* >          then the necessary information on the row pivoting is stored in */
+/* >          IWORK(N+1:N+M-1). */
+/* >          = 'N' The left singular vectors are not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >          = 'A', 'V' All N right singular vectors are computed and returned in */
+/* >          the matrix V. */
+/* >          = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular */
+/* >          vectors are computed and returned in the matrix V. This option is */
+/* >          allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal. */
+/* >          = 'N' The right singular vectors are not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A.  M >= N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array of dimensions LDA x N */
+/* >          On entry, the input matrix A. */
+/* >          On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains */
+/* >          the Householder vectors as stored by SGEQP3. If JOBU = 'F', these Householder */
+/* >          vectors together with WORK(1:N) can be used to restore the Q factors from */
+/* >          the initial pivoted QR factorization of A. See the description of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER. */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is REAL array of dimension N. */
+/* >          The singular values of A, ordered so that S(i) >= S(i+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is REAL array, dimension */
+/* >          LDU x M if JOBU = 'A'; see the description of LDU. In this case, */
+/* >          on exit, U contains the M left singular vectors. */
+/* >          LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this */
+/* >          case, U contains the leading N or the leading NUMRANK left singular vectors. */
+/* >          LDU x N if JOBU = 'F' ; see the description of LDU. In this case U */
+/* >          contains N x N orthogonal matrix that can be used to form the left */
+/* >          singular vectors. */
+/* >          If JOBU = 'N', U is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER. */
+/* >          The leading dimension of the array U. */
+/* >          If JOBU = 'A', 'S', 'U', 'R',  LDU >= f2cmax(1,M). */
+/* >          If JOBU = 'F',                 LDU >= f2cmax(1,N). */
+/* >          Otherwise,                     LDU >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension */
+/* >          LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' . */
+/* >          If JOBV = 'A', or 'V',  V contains the N-by-N orthogonal matrix  V**T; */
+/* >          If JOBV = 'R', V contains the first NUMRANK rows of V**T (the right */
+/* >          singular vectors, stored rowwise, of the NUMRANK largest singular values). */
+/* >          If JOBV = 'N' and JOBA = 'E', V is used as a workspace. */
+/* >          If JOBV = 'N', and JOBA.NE.'E', V is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. */
+/* >          If JOBV = 'A', 'V', 'R',  or JOBA = 'E', LDV >= f2cmax(1,N). */
+/* >          Otherwise,                               LDV >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] NUMRANK */
+/* > \verbatim */
+/* >          NUMRANK is INTEGER */
+/* >          NUMRANK is the numerical rank first determined after the rank */
+/* >          revealing QR factorization, following the strategy specified by the */
+/* >          value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK */
+/* >          leading singular values and vectors are then requested in the call */
+/* >          of SGESVD. The final value of NUMRANK might be further reduced if */
+/* >          some singular values are computed as zeros. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (f2cmax(1, LIWORK)). */
+/* >          On exit, IWORK(1:N) contains column pivoting permutation of the */
+/* >          rank revealing QR factorization. */
+/* >          If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence */
+/* >          of row swaps used in row pivoting. These can be used to restore the */
+/* >          left singular vectors in the case JOBU = 'F'. */
+/* > */
+/* >          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */
+/* >          LIWORK(1) returns the minimal LIWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LIWORK */
+/* > \verbatim */
+/* >          LIWORK is INTEGER */
+/* >          The dimension of the array IWORK. */
+/* >          LIWORK >= N + M - 1,     if JOBP = 'P' and JOBA .NE. 'E'; */
+/* >          LIWORK >= N              if JOBP = 'N' and JOBA .NE. 'E'; */
+/* >          LIWORK >= N + M - 1 + N, if JOBP = 'P' and JOBA = 'E'; */
+/* >          LIWORK >= N + N          if JOBP = 'N' and JOBA = 'E'. */
+
+/* >          If LIWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates and returns the optimal and minimal sizes */
+/* >          for the WORK, IWORK, and RWORK arrays, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (f2cmax(2, LWORK)), used as a workspace. */
+/* >          On exit, if, on entry, LWORK.NE.-1, WORK(1:N) contains parameters */
+/* >          needed to recover the Q factor from the QR factorization computed by */
+/* >          SGEQP3. */
+/* > */
+/* >          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */
+/* >          WORK(1) returns the optimal LWORK, and */
+/* >          WORK(2) returns the minimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. It is determined as follows: */
+/* >          Let  LWQP3 = 3*N+1,  LWCON = 3*N, and let */
+/* >          LWORQ = { MAX( N, 1 ),  if JOBU = 'R', 'S', or 'U' */
+/* >                  { MAX( M, 1 ),  if JOBU = 'A' */
+/* >          LWSVD = MAX( 5*N, 1 ) */
+/* >          LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 5*(N/2), 1 ), LWORLQ = MAX( N, 1 ), */
+/* >          LWQRF = MAX( N/2, 1 ), LWORQ2 = MAX( N, 1 ) */
+/* >          Then the minimal value of LWORK is: */
+/* >          = MAX( N + LWQP3, LWSVD )        if only the singular values are needed; */
+/* >          = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed, */
+/* >                                   and a scaled condition estimate requested; */
+/* > */
+/* >          = N + MAX( LWQP3, LWSVD, LWORQ ) if the singular values and the left */
+/* >                                   singular vectors are requested; */
+/* >          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the singular values and the left */
+/* >                                   singular vectors are requested, and also */
+/* >                                   a scaled condition estimate requested; */
+/* > */
+/* >          = N + MAX( LWQP3, LWSVD )        if the singular values and the right */
+/* >                                   singular vectors are requested; */
+/* >          = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right */
+/* >                                   singular vectors are requested, and also */
+/* >                                   a scaled condition etimate requested; */
+/* > */
+/* >          = N + MAX( LWQP3, LWSVD, LWORQ ) if the full SVD is requested with JOBV = 'R'; */
+/* >                                   independent of JOBR; */
+/* >          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the full SVD is requested, */
+/* >                                   JOBV = 'R' and, also a scaled condition */
+/* >                                   estimate requested; independent of JOBR; */
+/* >          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ), */
+/* >         N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ) ) if the */
+/* >                         full SVD is requested with JOBV = 'A' or 'V', and */
+/* >                         JOBR ='N' */
+/* >          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ), */
+/* >         N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ ) ) */
+/* >                         if the full SVD is requested with JOBV = 'A' or 'V', and */
+/* >                         JOBR ='N', and also a scaled condition number estimate */
+/* >                         requested. */
+/* >          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ), */
+/* >         N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) if the */
+/* >                         full SVD is requested with JOBV = 'A', 'V', and JOBR ='T' */
+/* >          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ), */
+/* >         N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) */
+/* >                         if the full SVD is requested with JOBV = 'A' or 'V', and */
+/* >                         JOBR ='T', and also a scaled condition number estimate */
+/* >                         requested. */
+/* >          Finally, LWORK must be at least two: LWORK = MAX( 2, LWORK ). */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates and returns the optimal and minimal sizes */
+/* >          for the WORK, IWORK, and RWORK arrays, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RWORK */
+/* > \verbatim */
+/* >          RWORK is REAL array, dimension (f2cmax(1, LRWORK)). */
+/* >          On exit, */
+/* >          1. If JOBA = 'E', RWORK(1) contains an estimate of the condition */
+/* >          number of column scaled A. If A = C * D where D is diagonal and C */
+/* >          has unit columns in the Euclidean norm, then, assuming full column rank, */
+/* >          N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1). */
+/* >          Otherwise, RWORK(1) = -1. */
+/* >          2. RWORK(2) contains the number of singular values computed as */
+/* >          exact zeros in SGESVD applied to the upper triangular or trapeziodal */
+/* >          R (from the initial QR factorization). In case of early exit (no call to */
+/* >          SGESVD, such as in the case of zero matrix) RWORK(2) = -1. */
+/* > */
+/* >          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */
+/* >          RWORK(1) returns the minimal LRWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LRWORK */
+/* > \verbatim */
+/* >          LRWORK is INTEGER. */
+/* >          The dimension of the array RWORK. */
+/* >          If JOBP ='P', then LRWORK >= MAX(2, M). */
+/* >          Otherwise, LRWORK >= 2 */
+
+/* >          If LRWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates and returns the optimal and minimal sizes */
+/* >          for the WORK, IWORK, and RWORK arrays, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if SBDSQR did not converge, INFO specifies how many superdiagonals */
+/* >          of an intermediate bidiagonal form B (computed in SGESVD) did not */
+/* >          converge to zero. */
+/* > \endverbatim */
+
+/* > \par Further Details: */
+/*  ======================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >   1. The data movement (matrix transpose) is coded using simple nested */
+/* >   DO-loops because BLAS and LAPACK do not provide corresponding subroutines. */
+/* >   Those DO-loops are easily identified in this source code - by the CONTINUE */
+/* >   statements labeled with 11**. In an optimized version of this code, the */
+/* >   nested DO loops should be replaced with calls to an optimized subroutine. */
+/* >   2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause */
+/* >   column norm overflow. This is the minial precaution and it is left to the */
+/* >   SVD routine (CGESVD) to do its own preemptive scaling if potential over- */
+/* >   or underflows are detected. To avoid repeated scanning of the array A, */
+/* >   an optimal implementation would do all necessary scaling before calling */
+/* >   CGESVD and the scaling in CGESVD can be switched off. */
+/* >   3. Other comments related to code optimization are given in comments in the */
+/* >   code, enlosed in [[double brackets]]. */
+/* > \endverbatim */
+
+/* > \par Bugs, examples and comments */
+/*  =========================== */
+
+/* > \verbatim */
+/* >  Please report all bugs and send interesting examples and/or comments to */
+/* >  drmac@math.hr. Thank you. */
+/* > \endverbatim */
+
+/* > \par References */
+/*  =============== */
+
+/* > \verbatim */
+/* >  [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for */
+/* >      Computing the SVD with High Accuracy. ACM Trans. Math. Softw. */
+/* >      44(1): 11:1-11:30 (2017) */
+/* > */
+/* >  SIGMA library, xGESVDQ section updated February 2016. */
+/* >  Developed and coded by Zlatko Drmac, Department of Mathematics */
+/* >  University of Zagreb, Croatia, drmac@math.hr */
+/* > \endverbatim */
+
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* > \verbatim */
+/* > Developed and coded by Zlatko Drmac, Department of Mathematics */
+/* >  University of Zagreb, Croatia, drmac@math.hr */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2018 */
+
+/* > \ingroup realGEsing */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgesvdq_(char *joba, char *jobp, char *jobr, char *jobu, 
+	char *jobv, integer *m, integer *n, real *a, integer *lda, real *s, 
+	real *u, integer *ldu, real *v, integer *ldv, integer *numrank, 
+	integer *iwork, integer *liwork, real *work, integer *lwork, real *
+	rwork, integer *lrwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2;
+    real r__1, r__2, r__3;
+
+    /* Local variables */
+    integer lwrk_sormlq__, lwrk_sormqr__, ierr, lwrk_sgesvd2__;
+    real rtmp;
+    integer lwrk_sormqr2__, optratio;
+    logical lsvc0;
+    extern real snrm2_(integer *, real *, integer *);
+    logical accla;
+    integer lwqp3;
+    logical acclh, acclm;
+    integer p, q;
+    logical conda;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    integer iwoff;
+    logical lsvec;
+    real sfmin, epsln;
+    integer lwcon;
+    logical rsvec;
+    integer lwlqf, lwqrf, n1, lwsvd;
+    logical dntwu, dntwv, wntua;
+    integer lworq;
+    logical wntuf, wntva, wntur, wntus, wntvr;
+    extern /* Subroutine */ int sgeqp3_(integer *, integer *, real *, integer 
+	    *, integer *, real *, real *, integer *, integer *);
+    integer lwsvd2, lworq2, nr;
+    real sconda;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgelqf_(
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , integer *), slascl_(char *, integer *, integer *, real *, real *
+	    , integer *, integer *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *), sgesvd_(char *, char *, 
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , real *, integer *, real *, integer *, integer *)
+	    , slacpy_(char *, integer *, integer *, real *, integer *, real *,
+	     integer *), slaset_(char *, integer *, integer *, real *,
+	     real *, real *, integer *), slapmt_(logical *, integer *,
+	     integer *, real *, integer *, integer *), spocon_(char *, 
+	    integer *, real *, integer *, real *, real *, real *, integer *, 
+	    integer *);
+    integer minwrk;
+    logical rtrans;
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
+	    *, integer *, integer *, integer *);
+    real rdummy[1];
+    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    logical lquery;
+    integer lwunlq;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    integer optwrk;
+    logical rowprm;
+    real big;
+    integer minwrk2;
+    logical ascaled;
+    integer optwrk2, lwrk_sgeqp3__, iminwrk, rminwrk, lwrk_sgelqf__, 
+	    lwrk_sgeqrf__, lwrk_sgesvd__;
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    --iwork;
+    --work;
+    --rwork;
+
+    /* Function Body */
+    wntus = lsame_(jobu, "S") || lsame_(jobu, "U");
+    wntur = lsame_(jobu, "R");
+    wntua = lsame_(jobu, "A");
+    wntuf = lsame_(jobu, "F");
+    lsvc0 = wntus || wntur || wntua;
+    lsvec = lsvc0 || wntuf;
+    dntwu = lsame_(jobu, "N");
+
+    wntvr = lsame_(jobv, "R");
+    wntva = lsame_(jobv, "A") || lsame_(jobv, "V");
+    rsvec = wntvr || wntva;
+    dntwv = lsame_(jobv, "N");
+
+    accla = lsame_(joba, "A");
+    acclm = lsame_(joba, "M");
+    conda = lsame_(joba, "E");
+    acclh = lsame_(joba, "H") || conda;
+
+    rowprm = lsame_(jobp, "P");
+    rtrans = lsame_(jobr, "T");
+
+    if (rowprm) {
+	if (conda) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n + *m - 1 + *n;
+	    iminwrk = f2cmax(i__1,i__2);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n + *m - 1;
+	    iminwrk = f2cmax(i__1,i__2);
+	}
+	rminwrk = f2cmax(2,*m);
+    } else {
+	if (conda) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n + *n;
+	    iminwrk = f2cmax(i__1,i__2);
+	} else {
+	    iminwrk = f2cmax(1,*n);
+	}
+	rminwrk = 2;
+    }
+    lquery = *liwork == -1 || *lwork == -1 || *lrwork == -1;
+    *info = 0;
+    if (! (accla || acclm || acclh)) {
+	*info = -1;
+    } else if (! (rowprm || lsame_(jobp, "N"))) {
+	*info = -2;
+    } else if (! (rtrans || lsame_(jobr, "N"))) {
+	*info = -3;
+    } else if (! (lsvec || dntwu)) {
+	*info = -4;
+    } else if (wntur && wntva) {
+	*info = -5;
+    } else if (! (rsvec || dntwv)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (*n < 0 || *n > *m) {
+	*info = -7;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -9;
+    } else if (*ldu < 1 || lsvc0 && *ldu < *m || wntuf && *ldu < *n) {
+	*info = -12;
+    } else if (*ldv < 1 || rsvec && *ldv < *n || conda && *ldv < *n) {
+	*info = -14;
+    } else if (*liwork < iminwrk && ! lquery) {
+	*info = -17;
+    }
+
+
+    if (*info == 0) {
+/*        [[The expressions for computing the minimal and the optimal */
+/*        values of LWORK are written with a lot of redundancy and */
+/*        can be simplified. However, this detailed form is easier for */
+/*        maintenance and modifications of the code.]] */
+
+	lwqp3 = *n * 3 + 1;
+	if (wntus || wntur) {
+	    lworq = f2cmax(*n,1);
+	} else if (wntua) {
+	    lworq = f2cmax(*m,1);
+	}
+	lwcon = *n * 3;
+/* Computing MAX */
+	i__1 = *n * 5;
+	lwsvd = f2cmax(i__1,1);
+	if (lquery) {
+	    sgeqp3_(m, n, &a[a_offset], lda, &iwork[1], rdummy, rdummy, &c_n1,
+		     &ierr);
+	    lwrk_sgeqp3__ = (integer) rdummy[0];
+	    if (wntus || wntur) {
+		sormqr_("L", "N", m, n, n, &a[a_offset], lda, rdummy, &u[
+			u_offset], ldu, rdummy, &c_n1, &ierr);
+		lwrk_sormqr__ = (integer) rdummy[0];
+	    } else if (wntua) {
+		sormqr_("L", "N", m, m, n, &a[a_offset], lda, rdummy, &u[
+			u_offset], ldu, rdummy, &c_n1, &ierr);
+		lwrk_sormqr__ = (integer) rdummy[0];
+	    } else {
+		lwrk_sormqr__ = 0;
+	    }
+	}
+	minwrk = 2;
+	optwrk = 2;
+	if (! (lsvec || rsvec)) {
+/*            only the singular values are requested */
+	    if (conda) {
+/* Computing MAX */
+		i__1 = *n + lwqp3, i__1 = f2cmax(i__1,lwcon);
+		minwrk = f2cmax(i__1,lwsvd);
+	    } else {
+/* Computing MAX */
+		i__1 = *n + lwqp3;
+		minwrk = f2cmax(i__1,lwsvd);
+	    }
+	    if (lquery) {
+		sgesvd_("N", "N", n, n, &a[a_offset], lda, &s[1], &u[u_offset]
+			, ldu, &v[v_offset], ldv, rdummy, &c_n1, &ierr);
+		lwrk_sgesvd__ = (integer) rdummy[0];
+		if (conda) {
+/* Computing MAX */
+		    i__1 = *n + lwrk_sgeqp3__, i__2 = *n + lwcon, i__1 = f2cmax(
+			    i__1,i__2);
+		    optwrk = f2cmax(i__1,lwrk_sgesvd__);
+		} else {
+/* Computing MAX */
+		    i__1 = *n + lwrk_sgeqp3__;
+		    optwrk = f2cmax(i__1,lwrk_sgesvd__);
+		}
+	    }
+	} else if (lsvec && ! rsvec) {
+/*            singular values and the left singular vectors are requested */
+	    if (conda) {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwcon), i__1 = f2cmax(i__1,lwsvd);
+		minwrk = *n + f2cmax(i__1,lworq);
+	    } else {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwsvd);
+		minwrk = *n + f2cmax(i__1,lworq);
+	    }
+	    if (lquery) {
+		if (rtrans) {
+		    sgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		} else {
+		    sgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		}
+		lwrk_sgesvd__ = (integer) rdummy[0];
+		if (conda) {
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_sgeqp3__,lwcon), i__1 = f2cmax(i__1,
+			    lwrk_sgesvd__);
+		    optwrk = *n + f2cmax(i__1,lwrk_sormqr__);
+		} else {
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_sgeqp3__,lwrk_sgesvd__);
+		    optwrk = *n + f2cmax(i__1,lwrk_sormqr__);
+		}
+	    }
+	} else if (rsvec && ! lsvec) {
+/*            singular values and the right singular vectors are requested */
+	    if (conda) {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwcon);
+		minwrk = *n + f2cmax(i__1,lwsvd);
+	    } else {
+		minwrk = *n + f2cmax(lwqp3,lwsvd);
+	    }
+	    if (lquery) {
+		if (rtrans) {
+		    sgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		} else {
+		    sgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		}
+		lwrk_sgesvd__ = (integer) rdummy[0];
+		if (conda) {
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_sgeqp3__,lwcon);
+		    optwrk = *n + f2cmax(i__1,lwrk_sgesvd__);
+		} else {
+		    optwrk = *n + f2cmax(lwrk_sgeqp3__,lwrk_sgesvd__);
+		}
+	    }
+	} else {
+/*            full SVD is requested */
+	    if (rtrans) {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwsvd);
+		minwrk = f2cmax(i__1,lworq);
+		if (conda) {
+		    minwrk = f2cmax(minwrk,lwcon);
+		}
+		minwrk += *n;
+		if (wntva) {
+/* Computing MAX */
+		    i__1 = *n / 2;
+		    lwqrf = f2cmax(i__1,1);
+/* Computing MAX */
+		    i__1 = *n / 2 * 5;
+		    lwsvd2 = f2cmax(i__1,1);
+		    lworq2 = f2cmax(*n,1);
+/* Computing MAX */
+		    i__1 = lwqp3, i__2 = *n / 2 + lwqrf, i__1 = f2cmax(i__1,i__2)
+			    , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), 
+			    i__2 = *n / 2 + lworq2, i__1 = f2cmax(i__1,i__2);
+		    minwrk2 = f2cmax(i__1,lworq);
+		    if (conda) {
+			minwrk2 = f2cmax(minwrk2,lwcon);
+		    }
+		    minwrk2 = *n + minwrk2;
+		    minwrk = f2cmax(minwrk,minwrk2);
+		}
+	    } else {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwsvd);
+		minwrk = f2cmax(i__1,lworq);
+		if (conda) {
+		    minwrk = f2cmax(minwrk,lwcon);
+		}
+		minwrk += *n;
+		if (wntva) {
+/* Computing MAX */
+		    i__1 = *n / 2;
+		    lwlqf = f2cmax(i__1,1);
+/* Computing MAX */
+		    i__1 = *n / 2 * 5;
+		    lwsvd2 = f2cmax(i__1,1);
+		    lwunlq = f2cmax(*n,1);
+/* Computing MAX */
+		    i__1 = lwqp3, i__2 = *n / 2 + lwlqf, i__1 = f2cmax(i__1,i__2)
+			    , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), 
+			    i__2 = *n / 2 + lwunlq, i__1 = f2cmax(i__1,i__2);
+		    minwrk2 = f2cmax(i__1,lworq);
+		    if (conda) {
+			minwrk2 = f2cmax(minwrk2,lwcon);
+		    }
+		    minwrk2 = *n + minwrk2;
+		    minwrk = f2cmax(minwrk,minwrk2);
+		}
+	    }
+	    if (lquery) {
+		if (rtrans) {
+		    sgesvd_("O", "A", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		    lwrk_sgesvd__ = (integer) rdummy[0];
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_sgeqp3__,lwrk_sgesvd__);
+		    optwrk = f2cmax(i__1,lwrk_sormqr__);
+		    if (conda) {
+			optwrk = f2cmax(optwrk,lwcon);
+		    }
+		    optwrk = *n + optwrk;
+		    if (wntva) {
+			i__1 = *n / 2;
+			sgeqrf_(n, &i__1, &u[u_offset], ldu, rdummy, rdummy, &
+				c_n1, &ierr);
+			lwrk_sgeqrf__ = (integer) rdummy[0];
+			i__1 = *n / 2;
+			i__2 = *n / 2;
+			sgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[
+				1], &u[u_offset], ldu, &v[v_offset], ldv, 
+				rdummy, &c_n1, &ierr);
+			lwrk_sgesvd2__ = (integer) rdummy[0];
+			i__1 = *n / 2;
+			sormqr_("R", "C", n, n, &i__1, &u[u_offset], ldu, 
+				rdummy, &v[v_offset], ldv, rdummy, &c_n1, &
+				ierr);
+			lwrk_sormqr2__ = (integer) rdummy[0];
+/* Computing MAX */
+			i__1 = lwrk_sgeqp3__, i__2 = *n / 2 + lwrk_sgeqrf__, 
+				i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + 
+				lwrk_sgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = 
+				*n / 2 + lwrk_sormqr2__;
+			optwrk2 = f2cmax(i__1,i__2);
+			if (conda) {
+			    optwrk2 = f2cmax(optwrk2,lwcon);
+			}
+			optwrk2 = *n + optwrk2;
+			optwrk = f2cmax(optwrk,optwrk2);
+		    }
+		} else {
+		    sgesvd_("S", "O", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		    lwrk_sgesvd__ = (integer) rdummy[0];
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_sgeqp3__,lwrk_sgesvd__);
+		    optwrk = f2cmax(i__1,lwrk_sormqr__);
+		    if (conda) {
+			optwrk = f2cmax(optwrk,lwcon);
+		    }
+		    optwrk = *n + optwrk;
+		    if (wntva) {
+			i__1 = *n / 2;
+			sgelqf_(&i__1, n, &u[u_offset], ldu, rdummy, rdummy, &
+				c_n1, &ierr);
+			lwrk_sgelqf__ = (integer) rdummy[0];
+			i__1 = *n / 2;
+			i__2 = *n / 2;
+			sgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[
+				1], &u[u_offset], ldu, &v[v_offset], ldv, 
+				rdummy, &c_n1, &ierr);
+			lwrk_sgesvd2__ = (integer) rdummy[0];
+			i__1 = *n / 2;
+			sormlq_("R", "N", n, n, &i__1, &u[u_offset], ldu, 
+				rdummy, &v[v_offset], ldv, rdummy, &c_n1, &
+				ierr);
+			lwrk_sormlq__ = (integer) rdummy[0];
+/* Computing MAX */
+			i__1 = lwrk_sgeqp3__, i__2 = *n / 2 + lwrk_sgelqf__, 
+				i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + 
+				lwrk_sgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = 
+				*n / 2 + lwrk_sormlq__;
+			optwrk2 = f2cmax(i__1,i__2);
+			if (conda) {
+			    optwrk2 = f2cmax(optwrk2,lwcon);
+			}
+			optwrk2 = *n + optwrk2;
+			optwrk = f2cmax(optwrk,optwrk2);
+		    }
+		}
+	    }
+	}
+
+	minwrk = f2cmax(2,minwrk);
+	optwrk = f2cmax(2,optwrk);
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -19;
+	}
+
+    }
+
+    if (*info == 0 && *lrwork < rminwrk && ! lquery) {
+	*info = -21;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESVDQ", &i__1, (ftnlen)7);
+	return 0;
+    } else if (lquery) {
+
+/*     Return optimal workspace */
+
+	iwork[1] = iminwrk;
+	work[1] = (real) optwrk;
+	work[2] = (real) minwrk;
+	rwork[1] = (real) rminwrk;
+	return 0;
+    }
+
+/*     Quick return if the matrix is void. */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+    big = slamch_("O");
+    ascaled = FALSE_;
+    iwoff = 1;
+    if (rowprm) {
+	iwoff = *m;
+/*           ell-infinity norm - this enhances numerical robustness in */
+/*           the case of differently scaled rows. */
+	i__1 = *m;
+	for (p = 1; p <= i__1; ++p) {
+/*               RWORK(p) = ABS( A(p,ICAMAX(N,A(p,1),LDA)) ) */
+/*               [[SLANGE will return NaN if an entry of the p-th row is Nan]] */
+	    rwork[p] = slange_("M", &c__1, n, &a[p + a_dim1], lda, rdummy);
+	    if (rwork[p] != rwork[p] || rwork[p] * 0.f != 0.f) {
+		*info = -8;
+		i__2 = -(*info);
+		xerbla_("SGESVDQ", &i__2, (ftnlen)7);
+		return 0;
+	    }
+/* L1904: */
+	}
+	i__1 = *m - 1;
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = *m - p + 1;
+	    q = isamax_(&i__2, &rwork[p], &c__1) + p - 1;
+	    iwork[*n + p] = q;
+	    if (p != q) {
+		rtmp = rwork[p];
+		rwork[p] = rwork[q];
+		rwork[q] = rtmp;
+	    }
+/* L1952: */
+	}
+
+	if (rwork[1] == 0.f) {
+/*              Quick return: A is the M x N zero matrix. */
+	    *numrank = 0;
+	    slaset_("G", n, &c__1, &c_b72, &c_b72, &s[1], n);
+	    if (wntus) {
+		slaset_("G", m, n, &c_b72, &c_b76, &u[u_offset], ldu);
+	    }
+	    if (wntua) {
+		slaset_("G", m, m, &c_b72, &c_b76, &u[u_offset], ldu);
+	    }
+	    if (wntva) {
+		slaset_("G", n, n, &c_b72, &c_b76, &v[v_offset], ldv);
+	    }
+	    if (wntuf) {
+		slaset_("G", n, &c__1, &c_b72, &c_b72, &work[1], n)
+			;
+		slaset_("G", m, n, &c_b72, &c_b76, &u[u_offset], ldu);
+	    }
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		iwork[p] = p;
+/* L5001: */
+	    }
+	    if (rowprm) {
+		i__1 = *n + *m - 1;
+		for (p = *n + 1; p <= i__1; ++p) {
+		    iwork[p] = p - *n;
+/* L5002: */
+		}
+	    }
+	    if (conda) {
+		rwork[1] = -1.f;
+	    }
+	    rwork[2] = -1.f;
+	    return 0;
+	}
+
+	if (rwork[1] > big / sqrt((real) (*m))) {
+/*               matrix by 1/sqrt(M) if too large entry detected */
+	    r__1 = sqrt((real) (*m));
+	    slascl_("G", &c__0, &c__0, &r__1, &c_b76, m, n, &a[a_offset], lda,
+		     &ierr);
+	    ascaled = TRUE_;
+	}
+	i__1 = *m - 1;
+	slaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[*n + 1], &c__1);
+    }
+
+/*    norms overflows during the QR factorization. The SVD procedure should */
+/*    have its own scaling to save the singular values from overflows and */
+/*    underflows. That depends on the SVD procedure. */
+
+    if (! rowprm) {
+	rtmp = slange_("M", m, n, &a[a_offset], lda, rdummy);
+	if (rtmp != rtmp || rtmp * 0.f != 0.f) {
+	    *info = -8;
+	    i__1 = -(*info);
+	    xerbla_("SGESVDQ", &i__1, (ftnlen)7);
+	    return 0;
+	}
+	if (rtmp > big / sqrt((real) (*m))) {
+/*             matrix by 1/sqrt(M) if too large entry detected */
+	    r__1 = sqrt((real) (*m));
+	    slascl_("G", &c__0, &c__0, &r__1, &c_b76, m, n, &a[a_offset], lda,
+		     &ierr);
+	    ascaled = TRUE_;
+	}
+    }
+
+
+/*     A * P = Q * [ R ] */
+/*                 [ 0 ] */
+
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+	iwork[p] = 0;
+/* L1963: */
+    }
+    i__1 = *lwork - *n;
+    sgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], &
+	    i__1, &ierr);
+
+/*    If the user requested accuracy level allows truncation in the */
+/*    computed upper triangular factor, the matrix R is examined and, */
+/*    if possible, replaced with its leading upper trapezoidal part. */
+
+    epsln = slamch_("E");
+    sfmin = slamch_("S");
+/*     SMALL = SFMIN / EPSLN */
+    nr = *n;
+
+    if (accla) {
+
+/*        Standard absolute error bound suffices. All sigma_i with */
+/*        sigma_i < N*EPS*||A||_F are flushed to zero. This is an */
+/*        aggressive enforcement of lower numerical rank by introducing a */
+/*        backward error of the order of N*EPS*||A||_F. */
+	nr = 1;
+	rtmp = sqrt((real) (*n)) * epsln;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((r__2 = a[p + p * a_dim1], abs(r__2)) < rtmp * (r__1 = a[
+		    a_dim1 + 1], abs(r__1))) {
+		goto L3002;
+	    }
+	    ++nr;
+/* L3001: */
+	}
+L3002:
+
+	;
+    } else if (acclm) {
+/*        Sudden drop on the diagonal of R is used as the criterion for being */
+/*        close-to-rank-deficient. The threshold is set to EPSLN=SLAMCH('E'). */
+/*        [[This can be made more flexible by replacing this hard-coded value */
+/*        with a user specified threshold.]] Also, the values that underflow */
+/*        will be truncated. */
+	nr = 1;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((r__2 = a[p + p * a_dim1], abs(r__2)) < epsln * (r__1 = a[p - 
+		    1 + (p - 1) * a_dim1], abs(r__1)) || (r__3 = a[p + p * 
+		    a_dim1], abs(r__3)) < sfmin) {
+		goto L3402;
+	    }
+	    ++nr;
+/* L3401: */
+	}
+L3402:
+
+	;
+    } else {
+/*        obvious case of zero pivots. */
+/*        R(i,i)=0 => R(i:N,i:N)=0. */
+	nr = 1;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((r__1 = a[p + p * a_dim1], abs(r__1)) == 0.f) {
+		goto L3502;
+	    }
+	    ++nr;
+/* L3501: */
+	}
+L3502:
+
+	if (conda) {
+/*           Estimate the scaled condition number of A. Use the fact that it is */
+/*           the same as the scaled condition number of R. */
+	    slacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv);
+/*              Only the leading NR x NR submatrix of the triangular factor */
+/*              is considered. Only if NR=N will this give a reliable error */
+/*              bound. However, even for NR < N, this can be used on an */
+/*              expert level and obtain useful information in the sense of */
+/*              perturbation theory. */
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		rtmp = snrm2_(&p, &v[p * v_dim1 + 1], &c__1);
+		r__1 = 1.f / rtmp;
+		sscal_(&p, &r__1, &v[p * v_dim1 + 1], &c__1);
+/* L3053: */
+	    }
+	    if (! (lsvec || rsvec)) {
+		spocon_("U", &nr, &v[v_offset], ldv, &c_b76, &rtmp, &work[1], 
+			&iwork[*n + iwoff], &ierr);
+	    } else {
+		spocon_("U", &nr, &v[v_offset], ldv, &c_b76, &rtmp, &work[*n 
+			+ 1], &iwork[*n + iwoff], &ierr);
+	    }
+	    sconda = 1.f / sqrt(rtmp);
+/*           For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1), */
+/*           N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */
+/*           See the reference [1] for more details. */
+	}
+
+    }
+
+    if (wntur) {
+	n1 = nr;
+    } else if (wntus || wntuf) {
+	n1 = *n;
+    } else if (wntua) {
+	n1 = *m;
+    }
+
+    if (! (rsvec || lsvec)) {
+/* ....................................................................... */
+/* ....................................................................... */
+	if (rtrans) {
+
+/*           the upper triangle of [A] to zero. */
+	    i__1 = f2cmin(*n,nr);
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n;
+		for (q = p + 1; q <= i__2; ++q) {
+		    a[q + p * a_dim1] = a[p + q * a_dim1];
+		    if (q <= nr) {
+			a[p + q * a_dim1] = 0.f;
+		    }
+/* L1147: */
+		}
+/* L1146: */
+	    }
+
+	    sgesvd_("N", "N", n, &nr, &a[a_offset], lda, &s[1], &u[u_offset], 
+		    ldu, &v[v_offset], ldv, &work[1], lwork, info);
+
+	} else {
+
+
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &a[a_dim1 + 2], 
+			lda);
+	    }
+	    sgesvd_("N", "N", &nr, n, &a[a_offset], lda, &s[1], &u[u_offset], 
+		    ldu, &v[v_offset], ldv, &work[1], lwork, info);
+
+	}
+
+    } else if (lsvec && ! rsvec) {
+/* ....................................................................... */
+/* ......................................................................."""""""" */
+	if (rtrans) {
+/*            vectors of R */
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n;
+		for (q = p; q <= i__2; ++q) {
+		    u[q + p * u_dim1] = a[p + q * a_dim1];
+/* L1193: */
+		}
+/* L1192: */
+	    }
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &u[(u_dim1 << 1) + 
+			1], ldu);
+	    }
+/*           vectors overwrite [U](1:NR,1:NR) as transposed. These */
+/*           will be pre-multiplied by Q to build the left singular vectors of A. */
+	    i__1 = *lwork - *n;
+	    sgesvd_("N", "O", n, &nr, &u[u_offset], ldu, &s[1], &u[u_offset], 
+		    ldu, &u[u_offset], ldu, &work[*n + 1], &i__1, info);
+
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = nr;
+		for (q = p + 1; q <= i__2; ++q) {
+		    rtmp = u[q + p * u_dim1];
+		    u[q + p * u_dim1] = u[p + q * u_dim1];
+		    u[p + q * u_dim1] = rtmp;
+/* L1120: */
+		}
+/* L1119: */
+	    }
+
+	} else {
+	    slacpy_("U", &nr, n, &a[a_offset], lda, &u[u_offset], ldu);
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &u[u_dim1 + 2], 
+			ldu);
+	    }
+/*            vectors overwrite [U](1:NR,1:NR) */
+	    i__1 = *lwork - *n;
+	    sgesvd_("O", "N", &nr, n, &u[u_offset], ldu, &s[1], &u[u_offset], 
+		    ldu, &v[v_offset], ldv, &work[*n + 1], &i__1, info);
+/*               R. These will be pre-multiplied by Q to build the left singular */
+/*               vectors of A. */
+	}
+
+/*              (M x NR) or (M x N) or (M x M). */
+	if (nr < *m && ! wntuf) {
+	    i__1 = *m - nr;
+	    slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + u_dim1], ldu);
+	    if (nr < n1) {
+		i__1 = n1 - nr;
+		slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) * u_dim1 
+			+ 1], ldu);
+		i__1 = *m - nr;
+		i__2 = n1 - nr;
+		slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 + (nr + 
+			1) * u_dim1], ldu);
+	    }
+	}
+
+/*           The Q matrix from the first QRF is built into the left singular */
+/*           vectors matrix U. */
+
+	if (! wntuf) {
+	    i__1 = *lwork - *n;
+	    sormqr_("L", "N", m, &n1, n, &a[a_offset], lda, &work[1], &u[
+		    u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+	}
+	if (rowprm && ! wntuf) {
+	    i__1 = *m - 1;
+	    slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], &
+		    c_n1);
+	}
+
+    } else if (rsvec && ! lsvec) {
+/* ....................................................................... */
+/* ....................................................................... */
+	if (rtrans) {
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n;
+		for (q = p; q <= i__2; ++q) {
+		    v[q + p * v_dim1] = a[p + q * a_dim1];
+/* L1166: */
+		}
+/* L1165: */
+	    }
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << 1) + 
+			1], ldv);
+	    }
+/*           vectors not computed */
+	    if (wntvr || nr == *n) {
+		i__1 = *lwork - *n;
+		sgesvd_("O", "N", n, &nr, &v[v_offset], ldv, &s[1], &u[
+			u_offset], ldu, &u[u_offset], ldu, &work[*n + 1], &
+			i__1, info);
+
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = nr;
+		    for (q = p + 1; q <= i__2; ++q) {
+			rtmp = v[q + p * v_dim1];
+			v[q + p * v_dim1] = v[p + q * v_dim1];
+			v[p + q * v_dim1] = rtmp;
+/* L1122: */
+		    }
+/* L1121: */
+		}
+
+		if (nr < *n) {
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = nr + 1; q <= i__2; ++q) {
+			    v[p + q * v_dim1] = v[q + p * v_dim1];
+/* L1104: */
+			}
+/* L1103: */
+		    }
+		}
+		slapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]);
+	    } else {
+/*               [!] This is simple implementation that augments [V](1:N,1:NR) */
+/*               by padding a zero block. In the case NR << N, a more efficient */
+/*               way is to first use the QR factorization. For more details */
+/*               how to implement this, see the " FULL SVD " branch. */
+		i__1 = *n - nr;
+		slaset_("G", n, &i__1, &c_b72, &c_b72, &v[(nr + 1) * v_dim1 + 
+			1], ldv);
+		i__1 = *lwork - *n;
+		sgesvd_("O", "N", n, n, &v[v_offset], ldv, &s[1], &u[u_offset]
+			, ldu, &u[u_offset], ldu, &work[*n + 1], &i__1, info);
+
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = *n;
+		    for (q = p + 1; q <= i__2; ++q) {
+			rtmp = v[q + p * v_dim1];
+			v[q + p * v_dim1] = v[p + q * v_dim1];
+			v[p + q * v_dim1] = rtmp;
+/* L1124: */
+		    }
+/* L1123: */
+		}
+		slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+	    }
+
+	} else {
+	    slacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv);
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + 2], 
+			ldv);
+	    }
+/*            vectors stored in U(1:NR,1:NR) */
+	    if (wntvr || nr == *n) {
+		i__1 = *lwork - *n;
+		sgesvd_("N", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[
+			u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], &
+			i__1, info);
+		slapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]);
+	    } else {
+/*               [!] This is simple implementation that augments [V](1:NR,1:N) */
+/*               by padding a zero block. In the case NR << N, a more efficient */
+/*               way is to first use the LQ factorization. For more details */
+/*               how to implement this, see the " FULL SVD " branch. */
+		i__1 = *n - nr;
+		slaset_("G", &i__1, n, &c_b72, &c_b72, &v[nr + 1 + v_dim1], 
+			ldv);
+		i__1 = *lwork - *n;
+		sgesvd_("N", "O", n, n, &v[v_offset], ldv, &s[1], &u[u_offset]
+			, ldu, &v[v_offset], ldv, &work[*n + 1], &i__1, info);
+		slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+	    }
+/*            vectors of A. */
+	}
+
+    } else {
+/* ....................................................................... */
+/* ....................................................................... */
+	if (rtrans) {
+
+
+	    if (wntvr || nr == *n) {
+/*            vectors of R**T */
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = *n;
+		    for (q = p; q <= i__2; ++q) {
+			v[q + p * v_dim1] = a[p + q * a_dim1];
+/* L1169: */
+		    }
+/* L1168: */
+		}
+		if (nr > 1) {
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << 
+			    1) + 1], ldv);
+		}
+
+/*           singular vectors of R**T stored in [U](1:NR,1:NR) as transposed */
+		i__1 = *lwork - *n;
+		sgesvd_("O", "A", n, &nr, &v[v_offset], ldv, &s[1], &v[
+			v_offset], ldv, &u[u_offset], ldu, &work[*n + 1], &
+			i__1, info);
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = nr;
+		    for (q = p + 1; q <= i__2; ++q) {
+			rtmp = v[q + p * v_dim1];
+			v[q + p * v_dim1] = v[p + q * v_dim1];
+			v[p + q * v_dim1] = rtmp;
+/* L1116: */
+		    }
+/* L1115: */
+		}
+		if (nr < *n) {
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = nr + 1; q <= i__2; ++q) {
+			    v[p + q * v_dim1] = v[q + p * v_dim1];
+/* L1102: */
+			}
+/* L1101: */
+		    }
+		}
+		slapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]);
+
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = nr;
+		    for (q = p + 1; q <= i__2; ++q) {
+			rtmp = u[q + p * u_dim1];
+			u[q + p * u_dim1] = u[p + q * u_dim1];
+			u[p + q * u_dim1] = rtmp;
+/* L1118: */
+		    }
+/* L1117: */
+		}
+
+		if (nr < *m && ! wntuf) {
+		    i__1 = *m - nr;
+		    slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + 
+			    u_dim1], ldu);
+		    if (nr < n1) {
+			i__1 = n1 - nr;
+			slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) *
+				 u_dim1 + 1], ldu);
+			i__1 = *m - nr;
+			i__2 = n1 - nr;
+			slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 
+				+ (nr + 1) * u_dim1], ldu);
+		    }
+		}
+
+	    } else {
+/*            vectors of R**T */
+/*               [[The optimal ratio N/NR for using QRF instead of padding */
+/*                 with zeros. Here hard coded to 2; it must be at least */
+/*                 two due to work space constraints.]] */
+/*               OPTRATIO = ILAENV(6, 'SGESVD', 'S' // 'O', NR,N,0,0) */
+/*               OPTRATIO = MAX( OPTRATIO, 2 ) */
+		optratio = 2;
+		if (optratio * nr > *n) {
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = p; q <= i__2; ++q) {
+			    v[q + p * v_dim1] = a[p + q * a_dim1];
+/* L1199: */
+			}
+/* L1198: */
+		    }
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 
+				<< 1) + 1], ldv);
+		    }
+
+		    i__1 = *n - nr;
+		    slaset_("A", n, &i__1, &c_b72, &c_b72, &v[(nr + 1) * 
+			    v_dim1 + 1], ldv);
+		    i__1 = *lwork - *n;
+		    sgesvd_("O", "A", n, n, &v[v_offset], ldv, &s[1], &v[
+			    v_offset], ldv, &u[u_offset], ldu, &work[*n + 1], 
+			    &i__1, info);
+
+		    i__1 = *n;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = p + 1; q <= i__2; ++q) {
+			    rtmp = v[q + p * v_dim1];
+			    v[q + p * v_dim1] = v[p + q * v_dim1];
+			    v[p + q * v_dim1] = rtmp;
+/* L1114: */
+			}
+/* L1113: */
+		    }
+		    slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+/*              (M x N1), i.e. (M x N) or (M x M). */
+
+		    i__1 = *n;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = p + 1; q <= i__2; ++q) {
+			    rtmp = u[q + p * u_dim1];
+			    u[q + p * u_dim1] = u[p + q * u_dim1];
+			    u[p + q * u_dim1] = rtmp;
+/* L1112: */
+			}
+/* L1111: */
+		    }
+
+		    if (*n < *m && ! wntuf) {
+			i__1 = *m - *n;
+			slaset_("A", &i__1, n, &c_b72, &c_b72, &u[*n + 1 + 
+				u_dim1], ldu);
+			if (*n < n1) {
+			    i__1 = n1 - *n;
+			    slaset_("A", n, &i__1, &c_b72, &c_b72, &u[(*n + 1)
+				     * u_dim1 + 1], ldu);
+			    i__1 = *m - *n;
+			    i__2 = n1 - *n;
+			    slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[*n 
+				    + 1 + (*n + 1) * u_dim1], ldu);
+			}
+		    }
+		} else {
+/*                  singular vectors of R */
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = p; q <= i__2; ++q) {
+			    u[q + (nr + p) * u_dim1] = a[p + q * a_dim1];
+/* L1197: */
+			}
+/* L1196: */
+		    }
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &u[(nr + 2)
+				 * u_dim1 + 1], ldu);
+		    }
+		    i__1 = *lwork - *n - nr;
+		    sgeqrf_(n, &nr, &u[(nr + 1) * u_dim1 + 1], ldu, &work[*n 
+			    + 1], &work[*n + nr + 1], &i__1, &ierr);
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = 1; q <= i__2; ++q) {
+			    v[q + p * v_dim1] = u[p + (nr + q) * u_dim1];
+/* L1144: */
+			}
+/* L1143: */
+		    }
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << 
+			    1) + 1], ldv);
+		    i__1 = *lwork - *n - nr;
+		    sgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, &work[*n + nr 
+			    + 1], &i__1, info);
+		    i__1 = *n - nr;
+		    slaset_("A", &i__1, &nr, &c_b72, &c_b72, &v[nr + 1 + 
+			    v_dim1], ldv);
+		    i__1 = *n - nr;
+		    slaset_("A", &nr, &i__1, &c_b72, &c_b72, &v[(nr + 1) * 
+			    v_dim1 + 1], ldv);
+		    i__1 = *n - nr;
+		    i__2 = *n - nr;
+		    slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &v[nr + 1 + (
+			    nr + 1) * v_dim1], ldv);
+		    i__1 = *lwork - *n - nr;
+		    sormqr_("R", "C", n, n, &nr, &u[(nr + 1) * u_dim1 + 1], 
+			    ldu, &work[*n + 1], &v[v_offset], ldv, &work[*n + 
+			    nr + 1], &i__1, &ierr);
+		    slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+/*                 (M x NR) or (M x N) or (M x M). */
+		    if (nr < *m && ! wntuf) {
+			i__1 = *m - nr;
+			slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + 
+				u_dim1], ldu);
+			if (nr < n1) {
+			    i__1 = n1 - nr;
+			    slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 
+				    1) * u_dim1 + 1], ldu);
+			    i__1 = *m - nr;
+			    i__2 = n1 - nr;
+			    slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr 
+				    + 1 + (nr + 1) * u_dim1], ldu);
+			}
+		    }
+		}
+	    }
+
+	} else {
+
+
+	    if (wntvr || nr == *n) {
+		slacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv);
+		if (nr > 1) {
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + 2],
+			     ldv);
+		}
+/*               singular vectors of R stored in [U](1:NR,1:NR) */
+		i__1 = *lwork - *n;
+		sgesvd_("S", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[
+			u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], &
+			i__1, info);
+		slapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]);
+/*              (M x NR) or (M x N) or (M x M). */
+		if (nr < *m && ! wntuf) {
+		    i__1 = *m - nr;
+		    slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + 
+			    u_dim1], ldu);
+		    if (nr < n1) {
+			i__1 = n1 - nr;
+			slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) *
+				 u_dim1 + 1], ldu);
+			i__1 = *m - nr;
+			i__2 = n1 - nr;
+			slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 
+				+ (nr + 1) * u_dim1], ldu);
+		    }
+		}
+
+	    } else {
+/*               is then N1 (N or M) */
+/*               [[The optimal ratio N/NR for using LQ instead of padding */
+/*                 with zeros. Here hard coded to 2; it must be at least */
+/*                 two due to work space constraints.]] */
+/*               OPTRATIO = ILAENV(6, 'SGESVD', 'S' // 'O', NR,N,0,0) */
+/*               OPTRATIO = MAX( OPTRATIO, 2 ) */
+		optratio = 2;
+		if (optratio * nr > *n) {
+		    slacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv);
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 
+				+ 2], ldv);
+		    }
+/*                 singular vectors of R stored in [U](1:NR,1:NR) */
+		    i__1 = *n - nr;
+		    slaset_("A", &i__1, n, &c_b72, &c_b72, &v[nr + 1 + v_dim1]
+			    , ldv);
+		    i__1 = *lwork - *n;
+		    sgesvd_("S", "O", n, n, &v[v_offset], ldv, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], 
+			    &i__1, info);
+		    slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+/*                 singular vectors of A. The leading N left singular vectors */
+/*                 are in [U](1:N,1:N) */
+/*                 (M x N1), i.e. (M x N) or (M x M). */
+		    if (*n < *m && ! wntuf) {
+			i__1 = *m - *n;
+			slaset_("A", &i__1, n, &c_b72, &c_b72, &u[*n + 1 + 
+				u_dim1], ldu);
+			if (*n < n1) {
+			    i__1 = n1 - *n;
+			    slaset_("A", n, &i__1, &c_b72, &c_b72, &u[(*n + 1)
+				     * u_dim1 + 1], ldu);
+			    i__1 = *m - *n;
+			    i__2 = n1 - *n;
+			    slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[*n 
+				    + 1 + (*n + 1) * u_dim1], ldu);
+			}
+		    }
+		} else {
+		    slacpy_("U", &nr, n, &a[a_offset], lda, &u[nr + 1 + 
+			    u_dim1], ldu);
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &u[nr + 2 
+				+ u_dim1], ldu);
+		    }
+		    i__1 = *lwork - *n - nr;
+		    sgelqf_(&nr, n, &u[nr + 1 + u_dim1], ldu, &work[*n + 1], &
+			    work[*n + nr + 1], &i__1, &ierr);
+		    slacpy_("L", &nr, &nr, &u[nr + 1 + u_dim1], ldu, &v[
+			    v_offset], ldv);
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 
+				<< 1) + 1], ldv);
+		    }
+		    i__1 = *lwork - *n - nr;
+		    sgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, &work[*n + nr 
+			    + 1], &i__1, info);
+		    i__1 = *n - nr;
+		    slaset_("A", &i__1, &nr, &c_b72, &c_b72, &v[nr + 1 + 
+			    v_dim1], ldv);
+		    i__1 = *n - nr;
+		    slaset_("A", &nr, &i__1, &c_b72, &c_b72, &v[(nr + 1) * 
+			    v_dim1 + 1], ldv);
+		    i__1 = *n - nr;
+		    i__2 = *n - nr;
+		    slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &v[nr + 1 + (
+			    nr + 1) * v_dim1], ldv);
+		    i__1 = *lwork - *n - nr;
+		    sormlq_("R", "N", n, n, &nr, &u[nr + 1 + u_dim1], ldu, &
+			    work[*n + 1], &v[v_offset], ldv, &work[*n + nr + 
+			    1], &i__1, &ierr);
+		    slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+/*              (M x NR) or (M x N) or (M x M). */
+		    if (nr < *m && ! wntuf) {
+			i__1 = *m - nr;
+			slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + 
+				u_dim1], ldu);
+			if (nr < n1) {
+			    i__1 = n1 - nr;
+			    slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 
+				    1) * u_dim1 + 1], ldu);
+			    i__1 = *m - nr;
+			    i__2 = n1 - nr;
+			    slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr 
+				    + 1 + (nr + 1) * u_dim1], ldu);
+			}
+		    }
+		}
+	    }
+	}
+
+/*           The Q matrix from the first QRF is built into the left singular */
+/*           vectors matrix U. */
+
+	if (! wntuf) {
+	    i__1 = *lwork - *n;
+	    sormqr_("L", "N", m, &n1, n, &a[a_offset], lda, &work[1], &u[
+		    u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+	}
+	if (rowprm && ! wntuf) {
+	    i__1 = *m - 1;
+	    slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], &
+		    c_n1);
+	}
+
+/*     ... end of the "full SVD" branch */
+    }
+
+/*     Check whether some singular values are returned as zeros, e.g. */
+/*     due to underflow, and update the numerical rank. */
+    p = nr;
+    for (q = p; q >= 1; --q) {
+	if (s[q] > 0.f) {
+	    goto L4002;
+	}
+	--nr;
+/* L4001: */
+    }
+L4002:
+
+/*     singular values are set to zero. */
+    if (nr < *n) {
+	i__1 = *n - nr;
+	slaset_("G", &i__1, &c__1, &c_b72, &c_b72, &s[nr + 1], n);
+    }
+/*     values. */
+    if (ascaled) {
+	r__1 = sqrt((real) (*m));
+	slascl_("G", &c__0, &c__0, &c_b76, &r__1, &nr, &c__1, &s[1], n, &ierr);
+    }
+    if (conda) {
+	rwork[1] = sconda;
+    }
+    rwork[2] = (real) (p - nr);
+/*     exact zeros in SGESVD() applied to the (possibly truncated) */
+/*     full row rank triangular (trapezoidal) factor of A. */
+    *numrank = nr;
+
+    return 0;
+
+/*     End of SGESVDQ */
+
+} /* sgesvdq_ */
+
diff --git a/lapack-netlib/SRC/sgesvdx.c b/lapack-netlib/SRC/sgesvdx.c
new file mode 100644
index 000000000..59c1c95d5
--- /dev/null
+++ b/lapack-netlib/SRC/sgesvdx.c
@@ -0,0 +1,1339 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__6 = 6;
+static integer c__0 = 0;
+static integer c__2 = 2;
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static real c_b109 = 0.f;
+
+/* > \brief <b> SGESVDX computes the singular value decomposition (SVD) for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGESVDX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvdx
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvdx
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvdx
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*     SUBROUTINE SGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, */
+/*    $                    IL, IU, NS, S, U, LDU, VT, LDVT, WORK, */
+/*    $                    LWORK, IWORK, INFO ) */
+
+
+/*      CHARACTER          JOBU, JOBVT, RANGE */
+/*      INTEGER            IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS */
+/*      REAL               VL, VU */
+/*     INTEGER            IWORK( * ) */
+/*     REAL               A( LDA, * ), S( * ), U( LDU, * ), */
+/*    $                   VT( LDVT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  SGESVDX computes the singular value decomposition (SVD) of a real */
+/* >  M-by-N matrix A, optionally computing the left and/or right singular */
+/* >  vectors. The SVD is written */
+/* > */
+/* >      A = U * SIGMA * transpose(V) */
+/* > */
+/* >  where SIGMA is an M-by-N matrix which is zero except for its */
+/* >  f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */
+/* >  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA */
+/* >  are the singular values of A; they are real and non-negative, and */
+/* >  are returned in descending order.  The first f2cmin(m,n) columns of */
+/* >  U and V are the left and right singular vectors of A. */
+/* > */
+/* >  SGESVDX uses an eigenvalue problem for obtaining the SVD, which */
+/* >  allows for the computation of a subset of singular values and */
+/* >  vectors. See SBDSVDX for details. */
+/* > */
+/* >  Note that the routine returns V**T, not V. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          Specifies options for computing all or part of the matrix U: */
+/* >          = 'V':  the first f2cmin(m,n) columns of U (the left singular */
+/* >                  vectors) or as specified by RANGE are returned in */
+/* >                  the array U; */
+/* >          = 'N':  no columns of U (no left singular vectors) are */
+/* >                  computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVT */
+/* > \verbatim */
+/* >          JOBVT is CHARACTER*1 */
+/* >           Specifies options for computing all or part of the matrix */
+/* >           V**T: */
+/* >           = 'V':  the first f2cmin(m,n) rows of V**T (the right singular */
+/* >                   vectors) or as specified by RANGE are returned in */
+/* >                   the array VT; */
+/* >           = 'N':  no rows of V**T (no right singular vectors) are */
+/* >                   computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RANGE */
+/* > \verbatim */
+/* >          RANGE is CHARACTER*1 */
+/* >          = 'A': all singular values will be found. */
+/* >          = 'V': all singular values in the half-open interval (VL,VU] */
+/* >                 will be found. */
+/* >          = 'I': the IL-th through IU-th singular values will be found. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the contents of A are destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] VL */
+/* > \verbatim */
+/* >          VL is REAL */
+/* >          If RANGE='V', the lower bound of the interval to */
+/* >          be searched for singular values. VU > VL. */
+/* >          Not referenced if RANGE = 'A' or 'I'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] VU */
+/* > \verbatim */
+/* >          VU is REAL */
+/* >          If RANGE='V', the upper bound of the interval to */
+/* >          be searched for singular values. VU > VL. */
+/* >          Not referenced if RANGE = 'A' or 'I'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IL */
+/* > \verbatim */
+/* >          IL is INTEGER */
+/* >          If RANGE='I', the index of the */
+/* >          smallest singular value to be returned. */
+/* >          1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */
+/* >          Not referenced if RANGE = 'A' or 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IU */
+/* > \verbatim */
+/* >          IU is INTEGER */
+/* >          If RANGE='I', the index of the */
+/* >          largest singular value to be returned. */
+/* >          1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */
+/* >          Not referenced if RANGE = 'A' or 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] NS */
+/* > \verbatim */
+/* >          NS is INTEGER */
+/* >          The total number of singular values found, */
+/* >          0 <= NS <= f2cmin(M,N). */
+/* >          If RANGE = 'A', NS = f2cmin(M,N); if RANGE = 'I', NS = IU-IL+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is REAL array, dimension (f2cmin(M,N)) */
+/* >          The singular values of A, sorted so that S(i) >= S(i+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is REAL array, dimension (LDU,UCOL) */
+/* >          If JOBU = 'V', U contains columns of U (the left singular */
+/* >          vectors, stored columnwise) as specified by RANGE; if */
+/* >          JOBU = 'N', U is not referenced. */
+/* >          Note: The user must ensure that UCOL >= NS; if RANGE = 'V', */
+/* >          the exact value of NS is not known in advance and an upper */
+/* >          bound must be used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U.  LDU >= 1; if */
+/* >          JOBU = 'V', LDU >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VT */
+/* > \verbatim */
+/* >          VT is REAL array, dimension (LDVT,N) */
+/* >          If JOBVT = 'V', VT contains the rows of V**T (the right singular */
+/* >          vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N', */
+/* >          VT is not referenced. */
+/* >          Note: The user must ensure that LDVT >= NS; if RANGE = 'V', */
+/* >          the exact value of NS is not known in advance and an upper */
+/* >          bound must be used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVT */
+/* > \verbatim */
+/* >          LDVT is INTEGER */
+/* >          The leading dimension of the array VT.  LDVT >= 1; if */
+/* >          JOBVT = 'V', LDVT >= NS (see above). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see */
+/* >          comments inside the code): */
+/* >             - PATH 1  (M much larger than N) */
+/* >             - PATH 1t (N much larger than M) */
+/* >          LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths. */
+/* >          For good performance, LWORK should generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (12*MIN(M,N)) */
+/* >          If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0, */
+/* >          then IWORK contains the indices of the eigenvectors that failed */
+/* >          to converge in SBDSVDX/SSTEVX. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >     INFO is INTEGER */
+/* >           = 0:  successful exit */
+/* >           < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >           > 0:  if INFO = i, then i eigenvectors failed to converge */
+/* >                 in SBDSVDX/SSTEVX. */
+/* >                 if INFO = N*2 + 1, an internal error occurred in */
+/* >                 SBDSVDX */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup realGEsing */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgesvdx_(char *jobu, char *jobvt, char *range, integer *
+	m, integer *n, real *a, integer *lda, real *vl, real *vu, integer *il,
+	 integer *iu, integer *ns, real *s, real *u, integer *ldu, real *vt, 
+	integer *ldvt, real *work, integer *lwork, integer *iwork, integer *
+	info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], 
+	    i__2, i__3;
+    char ch__1[2];
+
+    /* Local variables */
+    integer iscl;
+    logical alls, inds;
+    integer ilqf;
+    real anrm;
+    integer ierr, iqrf, itau;
+    char jobz[1];
+    logical vals;
+    integer i__, j;
+    extern logical lsame_(char *, char *);
+    integer iltgk, itemp, minmn, itaup, itauq, iutgk, itgkz, mnthr;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    logical wantu;
+    integer id, ie;
+    extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer 
+	    *, real *, real *, real *, real *, real *, integer *, integer *);
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *), slascl_(char *, integer 
+	    *, integer *, real *, real *, integer *, integer *, real *, 
+	    integer *, integer *);
+    real abstol;
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *), slacpy_(char *, integer 
+	    *, integer *, real *, integer *, real *, integer *);
+    char rngtgk[1];
+    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, 
+	    real *, real *, integer *), sormbr_(char *, char *, char *
+	    , integer *, integer *, integer *, real *, integer *, real *, 
+	    real *, integer *, real *, integer *, integer *);
+    integer minwrk, maxwrk;
+    real smlnum;
+    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    logical lquery, wantvt;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    real dum[1], eps;
+    extern /* Subroutine */ int sbdsvdx_(char *, char *, char *, integer *, 
+	    real *, real *, real *, real *, integer *, integer *, integer *, 
+	    real *, real *, integer *, real *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1 * 1;
+    vt -= vt_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *ns = 0;
+    *info = 0;
+    abstol = slamch_("S") * 2;
+    lquery = *lwork == -1;
+    minmn = f2cmin(*m,*n);
+    wantu = lsame_(jobu, "V");
+    wantvt = lsame_(jobvt, "V");
+    if (wantu || wantvt) {
+	*(unsigned char *)jobz = 'V';
+    } else {
+	*(unsigned char *)jobz = 'N';
+    }
+    alls = lsame_(range, "A");
+    vals = lsame_(range, "V");
+    inds = lsame_(range, "I");
+
+    *info = 0;
+    if (! lsame_(jobu, "V") && ! lsame_(jobu, "N")) {
+	*info = -1;
+    } else if (! lsame_(jobvt, "V") && ! lsame_(jobvt, 
+	    "N")) {
+	*info = -2;
+    } else if (! (alls || vals || inds)) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*m > *lda) {
+	*info = -7;
+    } else if (minmn > 0) {
+	if (vals) {
+	    if (*vl < 0.f) {
+		*info = -8;
+	    } else if (*vu <= *vl) {
+		*info = -9;
+	    }
+	} else if (inds) {
+	    if (*il < 1 || *il > f2cmax(1,minmn)) {
+		*info = -10;
+	    } else if (*iu < f2cmin(minmn,*il) || *iu > minmn) {
+		*info = -11;
+	    }
+	}
+	if (*info == 0) {
+	    if (wantu && *ldu < *m) {
+		*info = -15;
+	    } else if (wantvt) {
+		if (inds) {
+		    if (*ldvt < *iu - *il + 1) {
+			*info = -17;
+		    }
+		} else if (*ldvt < minmn) {
+		    *info = -17;
+		}
+	    }
+	}
+    }
+
+/*     Compute workspace */
+/*     (Note: Comments in the code beginning "Workspace:" describe the */
+/*     minimal amount of workspace needed at that point in the code, */
+/*     as well as the preferred amount for good performance. */
+/*     NB refers to the optimal block size for the immediately */
+/*     following subroutine, as returned by ILAENV.) */
+
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	if (minmn > 0) {
+	    if (*m >= *n) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = jobu;
+		i__1[1] = 1, a__1[1] = jobvt;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (
+			ftnlen)6, (ftnlen)2);
+		if (*m >= mnthr) {
+
+/*                 Path 1 (M much larger than N) */
+
+		    maxwrk = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * (*n + 5) + (*n << 1) * ilaenv_(
+			    &c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    maxwrk = f2cmax(i__2,i__3);
+		    if (wantu) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * (*n * 3 + 6) + *n * 
+				ilaenv_(&c__1, "SORMQR", " ", n, n, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    if (wantvt) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * (*n * 3 + 6) + *n * 
+				ilaenv_(&c__1, "SORMLQ", " ", n, n, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    minwrk = *n * (*n * 3 + 20);
+		} else {
+
+/*                 Path 2 (M at least N, but not much larger) */
+
+		    maxwrk = (*n << 2) + (*m + *n) * ilaenv_(&c__1, "SGEBRD", 
+			    " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		    if (wantu) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * ((*n << 1) + 5) + *n * 
+				ilaenv_(&c__1, "SORMQR", " ", n, n, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    if (wantvt) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * ((*n << 1) + 5) + *n * 
+				ilaenv_(&c__1, "SORMLQ", " ", n, n, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+/* Computing MAX */
+		    i__2 = *n * ((*n << 1) + 19), i__3 = (*n << 2) + *m;
+		    minwrk = f2cmax(i__2,i__3);
+		}
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = jobu;
+		i__1[1] = 1, a__1[1] = jobvt;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (
+			ftnlen)6, (ftnlen)2);
+		if (*n >= mnthr) {
+
+/*                 Path 1t (N much larger than M) */
+
+		    maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * (*m + 5) + (*m << 1) * ilaenv_(
+			    &c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    maxwrk = f2cmax(i__2,i__3);
+		    if (wantu) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * (*m * 3 + 6) + *m * 
+				ilaenv_(&c__1, "SORMQR", " ", m, m, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    if (wantvt) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * (*m * 3 + 6) + *m * 
+				ilaenv_(&c__1, "SORMLQ", " ", m, m, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    minwrk = *m * (*m * 3 + 20);
+		} else {
+
+/*                 Path 2t (N at least M, but not much larger) */
+
+		    maxwrk = (*m << 2) + (*m + *n) * ilaenv_(&c__1, "SGEBRD", 
+			    " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		    if (wantu) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * ((*m << 1) + 5) + *m * 
+				ilaenv_(&c__1, "SORMQR", " ", m, m, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    if (wantvt) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * ((*m << 1) + 5) + *m * 
+				ilaenv_(&c__1, "SORMLQ", " ", m, m, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+/* Computing MAX */
+		    i__2 = *m * ((*m << 1) + 19), i__3 = (*m << 2) + *n;
+		    minwrk = f2cmax(i__2,i__3);
+		}
+	    }
+	}
+	maxwrk = f2cmax(maxwrk,minwrk);
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -19;
+	}
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("SGESVDX", &i__2, (ftnlen)7);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Set singular values indices accord to RANGE. */
+
+    if (alls) {
+	*(unsigned char *)rngtgk = 'I';
+	iltgk = 1;
+	iutgk = f2cmin(*m,*n);
+    } else if (inds) {
+	*(unsigned char *)rngtgk = 'I';
+	iltgk = *il;
+	iutgk = *iu;
+    } else {
+	*(unsigned char *)rngtgk = 'V';
+	iltgk = 0;
+	iutgk = 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = sqrt(slamch_("S")) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+    }
+
+    if (*m >= *n) {
+
+/*        A has at least as many rows as columns. If A has sufficiently */
+/*        more rows than columns, first reduce A using the QR */
+/*        decomposition. */
+
+	if (*m >= mnthr) {
+
+/*           Path 1 (M much larger than N): */
+/*           A = Q * R = Q * ( QB * B * PB**T ) */
+/*                     = Q * ( QB * ( UB * S * VB**T ) * PB**T ) */
+/*           U = Q * QB * UB; V**T = VB**T * PB**T */
+
+/*           Compute A=Q*R */
+/*           (Workspace: need 2*N, prefer N+N*NB) */
+
+	    itau = 1;
+	    itemp = itau + *n;
+	    i__2 = *lwork - itemp + 1;
+	    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2,
+		     info);
+
+/*           Copy R into WORK and bidiagonalize it: */
+/*           (Workspace: need N*N+5*N, prefer N*N+4*N+2*N*NB) */
+
+	    iqrf = itemp;
+	    id = iqrf + *n * *n;
+	    ie = id + *n;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    itemp = itaup + *n;
+	    slacpy_("U", n, n, &a[a_offset], lda, &work[iqrf], n);
+	    i__2 = *n - 1;
+	    i__3 = *n - 1;
+	    slaset_("L", &i__2, &i__3, &c_b109, &c_b109, &work[iqrf + 1], n);
+	    i__2 = *lwork - itemp + 1;
+	    sgebrd_(n, n, &work[iqrf], n, &work[id], &work[ie], &work[itauq], 
+		    &work[itaup], &work[itemp], &i__2, info);
+
+/*           Solve eigenvalue problem TGK*Z=Z*S. */
+/*           (Workspace: need 14*N + 2*N*(N+1)) */
+
+	    itgkz = itemp;
+	    itemp = itgkz + *n * ((*n << 1) + 1);
+	    i__2 = *n << 1;
+	    sbdsvdx_("U", jobz, rngtgk, n, &work[id], &work[ie], vl, vu, &
+		    iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[
+		    itemp], &iwork[1], info);
+
+/*           If needed, compute left singular vectors. */
+
+	    if (wantu) {
+		j = itgkz;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    scopy_(n, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1);
+		    j += *n << 1;
+		}
+		i__2 = *m - *n;
+		slaset_("A", &i__2, ns, &c_b109, &c_b109, &u[*n + 1 + u_dim1],
+			 ldu);
+
+/*              Call SORMBR to compute QB*UB. */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormbr_("Q", "L", "N", n, ns, n, &work[iqrf], n, &work[itauq],
+			 &u[u_offset], ldu, &work[itemp], &i__2, info);
+
+/*              Call SORMQR to compute Q*(QB*UB). */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormqr_("L", "N", m, ns, n, &a[a_offset], lda, &work[itau], &
+			u[u_offset], ldu, &work[itemp], &i__2, info);
+	    }
+
+/*           If needed, compute right singular vectors. */
+
+	    if (wantvt) {
+		j = itgkz + *n;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    scopy_(n, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt);
+		    j += *n << 1;
+		}
+
+/*              Call SORMBR to compute VB**T * PB**T */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormbr_("P", "R", "T", ns, n, n, &work[iqrf], n, &work[itaup],
+			 &vt[vt_offset], ldvt, &work[itemp], &i__2, info);
+	    }
+	} else {
+
+/*           Path 2 (M at least N, but not much larger) */
+/*           Reduce A to bidiagonal form without QR decomposition */
+/*           A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */
+/*           U = QB * UB; V**T = VB**T * PB**T */
+
+/*           Bidiagonalize A */
+/*           (Workspace: need 4*N+M, prefer 4*N+(M+N)*NB) */
+
+	    id = 1;
+	    ie = id + *n;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    itemp = itaup + *n;
+	    i__2 = *lwork - itemp + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &work[id], &work[ie], &work[
+		    itauq], &work[itaup], &work[itemp], &i__2, info);
+
+/*           Solve eigenvalue problem TGK*Z=Z*S. */
+/*           (Workspace: need 14*N + 2*N*(N+1)) */
+
+	    itgkz = itemp;
+	    itemp = itgkz + *n * ((*n << 1) + 1);
+	    i__2 = *n << 1;
+	    sbdsvdx_("U", jobz, rngtgk, n, &work[id], &work[ie], vl, vu, &
+		    iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[
+		    itemp], &iwork[1], info);
+
+/*           If needed, compute left singular vectors. */
+
+	    if (wantu) {
+		j = itgkz;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    scopy_(n, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1);
+		    j += *n << 1;
+		}
+		i__2 = *m - *n;
+		slaset_("A", &i__2, ns, &c_b109, &c_b109, &u[*n + 1 + u_dim1],
+			 ldu);
+
+/*              Call SORMBR to compute QB*UB. */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[itemp], &i__2, &ierr);
+	    }
+
+/*           If needed, compute right singular vectors. */
+
+	    if (wantvt) {
+		j = itgkz + *n;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    scopy_(n, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt);
+		    j += *n << 1;
+		}
+
+/*              Call SORMBR to compute VB**T * PB**T */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormbr_("P", "R", "T", ns, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, &
+			ierr);
+	    }
+	}
+    } else {
+
+/*        A has more columns than rows. If A has sufficiently more */
+/*        columns than rows, first reduce A using the LQ decomposition. */
+
+	if (*n >= mnthr) {
+
+/*           Path 1t (N much larger than M): */
+/*           A = L * Q = ( QB * B * PB**T ) * Q */
+/*                     = ( QB * ( UB * S * VB**T ) * PB**T ) * Q */
+/*           U = QB * UB ; V**T = VB**T * PB**T * Q */
+
+/*           Compute A=L*Q */
+/*           (Workspace: need 2*M, prefer M+M*NB) */
+
+	    itau = 1;
+	    itemp = itau + *m;
+	    i__2 = *lwork - itemp + 1;
+	    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2,
+		     info);
+/*           Copy L into WORK and bidiagonalize it: */
+/*           (Workspace in WORK( ITEMP ): need M*M+5*N, prefer M*M+4*M+2*M*NB) */
+
+	    ilqf = itemp;
+	    id = ilqf + *m * *m;
+	    ie = id + *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    itemp = itaup + *m;
+	    slacpy_("L", m, m, &a[a_offset], lda, &work[ilqf], m);
+	    i__2 = *m - 1;
+	    i__3 = *m - 1;
+	    slaset_("U", &i__2, &i__3, &c_b109, &c_b109, &work[ilqf + *m], m);
+	    i__2 = *lwork - itemp + 1;
+	    sgebrd_(m, m, &work[ilqf], m, &work[id], &work[ie], &work[itauq], 
+		    &work[itaup], &work[itemp], &i__2, info);
+
+/*           Solve eigenvalue problem TGK*Z=Z*S. */
+/*           (Workspace: need 2*M*M+14*M) */
+
+	    itgkz = itemp;
+	    itemp = itgkz + *m * ((*m << 1) + 1);
+	    i__2 = *m << 1;
+	    sbdsvdx_("U", jobz, rngtgk, m, &work[id], &work[ie], vl, vu, &
+		    iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[
+		    itemp], &iwork[1], info);
+
+/*           If needed, compute left singular vectors. */
+
+	    if (wantu) {
+		j = itgkz;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    scopy_(m, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1);
+		    j += *m << 1;
+		}
+
+/*              Call SORMBR to compute QB*UB. */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormbr_("Q", "L", "N", m, ns, m, &work[ilqf], m, &work[itauq],
+			 &u[u_offset], ldu, &work[itemp], &i__2, info);
+	    }
+
+/*           If needed, compute right singular vectors. */
+
+	    if (wantvt) {
+		j = itgkz + *m;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    scopy_(m, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt);
+		    j += *m << 1;
+		}
+		i__2 = *n - *m;
+		slaset_("A", ns, &i__2, &c_b109, &c_b109, &vt[(*m + 1) * 
+			vt_dim1 + 1], ldvt);
+
+/*              Call SORMBR to compute (VB**T)*(PB**T) */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormbr_("P", "R", "T", ns, m, m, &work[ilqf], m, &work[itaup],
+			 &vt[vt_offset], ldvt, &work[itemp], &i__2, info);
+
+/*              Call SORMLQ to compute ((VB**T)*(PB**T))*Q. */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormlq_("R", "N", ns, n, m, &a[a_offset], lda, &work[itau], &
+			vt[vt_offset], ldvt, &work[itemp], &i__2, info);
+	    }
+	} else {
+
+/*           Path 2t (N greater than M, but not much larger) */
+/*           Reduce to bidiagonal form without LQ decomposition */
+/*           A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */
+/*           U = QB * UB; V**T = VB**T * PB**T */
+
+/*           Bidiagonalize A */
+/*           (Workspace: need 4*M+N, prefer 4*M+(M+N)*NB) */
+
+	    id = 1;
+	    ie = id + *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    itemp = itaup + *m;
+	    i__2 = *lwork - itemp + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &work[id], &work[ie], &work[
+		    itauq], &work[itaup], &work[itemp], &i__2, info);
+
+/*           Solve eigenvalue problem TGK*Z=Z*S. */
+/*           (Workspace: need 2*M*M+14*M) */
+
+	    itgkz = itemp;
+	    itemp = itgkz + *m * ((*m << 1) + 1);
+	    i__2 = *m << 1;
+	    sbdsvdx_("L", jobz, rngtgk, m, &work[id], &work[ie], vl, vu, &
+		    iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[
+		    itemp], &iwork[1], info);
+
+/*           If needed, compute left singular vectors. */
+
+	    if (wantu) {
+		j = itgkz;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    scopy_(m, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1);
+		    j += *m << 1;
+		}
+
+/*              Call SORMBR to compute QB*UB. */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[itemp], &i__2, info);
+	    }
+
+/*           If needed, compute right singular vectors. */
+
+	    if (wantvt) {
+		j = itgkz + *m;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    scopy_(m, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt);
+		    j += *m << 1;
+		}
+		i__2 = *n - *m;
+		slaset_("A", ns, &i__2, &c_b109, &c_b109, &vt[(*m + 1) * 
+			vt_dim1 + 1], ldvt);
+
+/*              Call SORMBR to compute VB**T * PB**T */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		sormbr_("P", "R", "T", ns, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, 
+			info);
+	    }
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, info);
+	}
+	if (anrm < smlnum) {
+	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, info);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (real) maxwrk;
+
+    return 0;
+
+/*     End of SGESVDX */
+
+} /* sgesvdx_ */
+
diff --git a/lapack-netlib/SRC/sgesvj.c b/lapack-netlib/SRC/sgesvj.c
new file mode 100644
index 000000000..32f49143c
--- /dev/null
+++ b/lapack-netlib/SRC/sgesvj.c
@@ -0,0 +1,2210 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static real c_b17 = 0.f;
+static real c_b18 = 1.f;
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c__2 = 2;
+
+/* > \brief \b SGESVJ */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGESVJ + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvj.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvj.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvj.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, */
+/*                          LDV, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDV, LWORK, M, MV, N */
+/*       CHARACTER*1        JOBA, JOBU, JOBV */
+/*       REAL               A( LDA, * ), SVA( N ), V( LDV, * ), */
+/*      $                   WORK( LWORK ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGESVJ computes the singular value decomposition (SVD) of a real */
+/* > M-by-N matrix A, where M >= N. The SVD of A is written as */
+/* >                                    [++]   [xx]   [x0]   [xx] */
+/* >              A = U * SIGMA * V^t,  [++] = [xx] * [ox] * [xx] */
+/* >                                    [++]   [xx] */
+/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */
+/* > matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */
+/* > of SIGMA are the singular values of A. The columns of U and V are the */
+/* > left and the right singular vectors of A, respectively. */
+/* > SGESVJ can sometimes compute tiny singular values and their singular vectors much */
+/* > more accurately than other SVD routines, see below under Further Details. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBA */
+/* > \verbatim */
+/* >          JOBA is CHARACTER*1 */
+/* >          Specifies the structure of A. */
+/* >          = 'L': The input matrix A is lower triangular; */
+/* >          = 'U': The input matrix A is upper triangular; */
+/* >          = 'G': The input matrix A is general M-by-N matrix, M >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          Specifies whether to compute the left singular vectors */
+/* >          (columns of U): */
+/* >          = 'U': The left singular vectors corresponding to the nonzero */
+/* >                 singular values are computed and returned in the leading */
+/* >                 columns of A. See more details in the description of A. */
+/* >                 The default numerical orthogonality threshold is set to */
+/* >                 approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=SLAMCH('E'). */
+/* >          = 'C': Analogous to JOBU='U', except that user can control the */
+/* >                 level of numerical orthogonality of the computed left */
+/* >                 singular vectors. TOL can be set to TOL = CTOL*EPS, where */
+/* >                 CTOL is given on input in the array WORK. */
+/* >                 No CTOL smaller than ONE is allowed. CTOL greater */
+/* >                 than 1 / EPS is meaningless. The option 'C' */
+/* >                 can be used if M*EPS is satisfactory orthogonality */
+/* >                 of the computed left singular vectors, so CTOL=M could */
+/* >                 save few sweeps of Jacobi rotations. */
+/* >                 See the descriptions of A and WORK(1). */
+/* >          = 'N': The matrix U is not computed. However, see the */
+/* >                 description of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >          Specifies whether to compute the right singular vectors, that */
+/* >          is, the matrix V: */
+/* >          = 'V':  the matrix V is computed and returned in the array V */
+/* >          = 'A':  the Jacobi rotations are applied to the MV-by-N */
+/* >                  array V. In other words, the right singular vector */
+/* >                  matrix V is not computed explicitly; instead it is */
+/* >                  applied to an MV-by-N matrix initially stored in the */
+/* >                  first MV rows of V. */
+/* >          = 'N':  the matrix V is not computed and the array V is not */
+/* >                  referenced */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A. 1/SLAMCH('E') > M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A. */
+/* >          M >= N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          If JOBU = 'U' .OR. JOBU = 'C': */
+/* >                 If INFO = 0: */
+/* >                 RANKA orthonormal columns of U are returned in the */
+/* >                 leading RANKA columns of the array A. Here RANKA <= N */
+/* >                 is the number of computed singular values of A that are */
+/* >                 above the underflow threshold SLAMCH('S'). The singular */
+/* >                 vectors corresponding to underflowed or zero singular */
+/* >                 values are not computed. The value of RANKA is returned */
+/* >                 in the array WORK as RANKA=NINT(WORK(2)). Also see the */
+/* >                 descriptions of SVA and WORK. The computed columns of U */
+/* >                 are mutually numerically orthogonal up to approximately */
+/* >                 TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU = 'C'), */
+/* >                 see the description of JOBU. */
+/* >                 If INFO > 0, */
+/* >                 the procedure SGESVJ did not converge in the given number */
+/* >                 of iterations (sweeps). In that case, the computed */
+/* >                 columns of U may not be orthogonal up to TOL. The output */
+/* >                 U (stored in A), SIGMA (given by the computed singular */
+/* >                 values in SVA(1:N)) and V is still a decomposition of the */
+/* >                 input matrix A in the sense that the residual */
+/* >                 ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small. */
+/* >          If JOBU = 'N': */
+/* >                 If INFO = 0: */
+/* >                 Note that the left singular vectors are 'for free' in the */
+/* >                 one-sided Jacobi SVD algorithm. However, if only the */
+/* >                 singular values are needed, the level of numerical */
+/* >                 orthogonality of U is not an issue and iterations are */
+/* >                 stopped when the columns of the iterated matrix are */
+/* >                 numerically orthogonal up to approximately M*EPS. Thus, */
+/* >                 on exit, A contains the columns of U scaled with the */
+/* >                 corresponding singular values. */
+/* >                 If INFO > 0: */
+/* >                 the procedure SGESVJ did not converge in the given number */
+/* >                 of iterations (sweeps). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SVA */
+/* > \verbatim */
+/* >          SVA is REAL array, dimension (N) */
+/* >          On exit, */
+/* >          If INFO = 0 : */
+/* >          depending on the value SCALE = WORK(1), we have: */
+/* >                 If SCALE = ONE: */
+/* >                 SVA(1:N) contains the computed singular values of A. */
+/* >                 During the computation SVA contains the Euclidean column */
+/* >                 norms of the iterated matrices in the array A. */
+/* >                 If SCALE .NE. ONE: */
+/* >                 The singular values of A are SCALE*SVA(1:N), and this */
+/* >                 factored representation is due to the fact that some of the */
+/* >                 singular values of A might underflow or overflow. */
+/* > */
+/* >          If INFO > 0 : */
+/* >          the procedure SGESVJ did not converge in the given number of */
+/* >          iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MV */
+/* > \verbatim */
+/* >          MV is INTEGER */
+/* >          If JOBV = 'A', then the product of Jacobi rotations in SGESVJ */
+/* >          is applied to the first MV rows of V. See the description of JOBV. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension (LDV,N) */
+/* >          If JOBV = 'V', then V contains on exit the N-by-N matrix of */
+/* >                         the right singular vectors; */
+/* >          If JOBV = 'A', then V contains the product of the computed right */
+/* >                         singular vector matrix and the initial matrix in */
+/* >                         the array V. */
+/* >          If JOBV = 'N', then V is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V, LDV >= 1. */
+/* >          If JOBV = 'V', then LDV >= f2cmax(1,N). */
+/* >          If JOBV = 'A', then LDV >= f2cmax(1,MV) . */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (LWORK) */
+/* >          On entry, */
+/* >          If JOBU = 'C' : */
+/* >          WORK(1) = CTOL, where CTOL defines the threshold for convergence. */
+/* >                    The process stops if all columns of A are mutually */
+/* >                    orthogonal up to CTOL*EPS, EPS=SLAMCH('E'). */
+/* >                    It is required that CTOL >= ONE, i.e. it is not */
+/* >                    allowed to force the routine to obtain orthogonality */
+/* >                    below EPSILON. */
+/* >          On exit, */
+/* >          WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) */
+/* >                    are the computed singular vcalues of A. */
+/* >                    (See description of SVA().) */
+/* >          WORK(2) = NINT(WORK(2)) is the number of the computed nonzero */
+/* >                    singular values. */
+/* >          WORK(3) = NINT(WORK(3)) is the number of the computed singular */
+/* >                    values that are larger than the underflow threshold. */
+/* >          WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi */
+/* >                    rotations needed for numerical convergence. */
+/* >          WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. */
+/* >                    This is useful information in cases when SGESVJ did */
+/* >                    not converge, as it can be used to estimate whether */
+/* >                    the output is still useful and for post festum analysis. */
+/* >          WORK(6) = the largest absolute value over all sines of the */
+/* >                    Jacobi rotation angles in the last sweep. It can be */
+/* >                    useful for a post festum analysis. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >         length of WORK, WORK >= MAX(6,M+N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, then the i-th argument had an illegal value */
+/* >          > 0:  SGESVJ did not converge in the maximal allowed number (30) */
+/* >                of sweeps. The output may still be useful. See the */
+/* >                description of WORK. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane */
+/* > rotations. The rotations are implemented as fast scaled rotations of */
+/* > Anda and Park [1]. In the case of underflow of the Jacobi angle, a */
+/* > modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses */
+/* > column interchanges of de Rijk [2]. The relative accuracy of the computed */
+/* > singular values and the accuracy of the computed singular vectors (in */
+/* > angle metric) is as guaranteed by the theory of Demmel and Veselic [3]. */
+/* > The condition number that determines the accuracy in the full rank case */
+/* > is essentially min_{D=diag} kappa(A*D), where kappa(.) is the */
+/* > spectral condition number. The best performance of this Jacobi SVD */
+/* > procedure is achieved if used in an  accelerated version of Drmac and */
+/* > Veselic [5,6], and it is the kernel routine in the SIGMA library [7]. */
+/* > Some tunning parameters (marked with [TP]) are available for the */
+/* > implementer. \n */
+/* > The computational range for the nonzero singular values is the  machine */
+/* > number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even */
+/* > denormalized singular values can be computed with the corresponding */
+/* > gradual loss of accurate digits. */
+/* > */
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* > Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */
+/* > */
+/* > \par References: */
+/*  ================ */
+/* > */
+/* > [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. \n */
+/* >    SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. \n\n */
+/* > [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the */
+/* >    singular value decomposition on a vector computer. \n */
+/* >    SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. \n\n */
+/* > [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. \n */
+/* > [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular */
+/* >    value computation in floating point arithmetic. \n */
+/* >    SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. \n\n */
+/* > [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. \n */
+/* >    SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. \n */
+/* >    LAPACK Working note 169. \n\n */
+/* > [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. \n */
+/* >    SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. \n */
+/* >    LAPACK Working note 170. \n\n */
+/* > [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */
+/* >    QSVD, (H,K)-SVD computations.\n */
+/* >    Department of Mathematics, University of Zagreb, 2008. */
+/* > */
+/* > \par Bugs, Examples and Comments: */
+/*  ================================= */
+/* > */
+/* > Please report all bugs and send interesting test examples and comments to */
+/* > drmac@math.hr. Thank you. */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgesvj_(char *joba, char *jobu, char *jobv, integer *m, 
+	integer *n, real *a, integer *lda, real *sva, integer *mv, real *v, 
+	integer *ldv, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1, r__2;
+
+    /* Local variables */
+    real aapp, aapq, aaqq, ctol;
+    integer ierr;
+    real bigtheta;
+    extern real sdot_(integer *, real *, integer *, real *, integer *);
+    integer pskipped;
+    real aapp0, temp1;
+    extern real snrm2_(integer *, real *, integer *);
+    integer i__, p, q;
+    real t, large, apoaq, aqoap;
+    extern logical lsame_(char *, char *);
+    real theta;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    real small, sfmin;
+    logical lsvec;
+    real fastr[5], epsln;
+    logical applv, rsvec, uctol, lower, upper;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    logical rotok;
+    integer n2;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *);
+    integer n4;
+    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
+	    real *, integer *), srotm_(integer *, real *, integer *, real *, 
+	    integer *, real *);
+    real rootsfmin;
+    extern /* Subroutine */ int sgsvj0_(char *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, real *, integer *, real *, integer *, integer *), 
+	    sgsvj1_(char *, integer *, integer *, integer *, real *, integer *
+	    , real *, real *, integer *, real *, integer *, real *, real *, 
+	    real *, integer *, real *, integer *, integer *);
+    integer n34;
+    real cs, sn;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    integer ijblsk, swband;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    integer blskip;
+    real mxaapq;
+    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, 
+	    real *, real *, integer *);
+    real thsign;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 
+	    real *);
+    real mxsinj;
+    integer ir1, emptsw, notrot, iswrot, jbc;
+    real big;
+    integer kbl, lkahead, igl, ibr, jgl, nbl;
+    real skl;
+    logical goscale;
+    real tol;
+    integer mvl;
+    logical noscale;
+    real rootbig, rooteps;
+    integer rowskip;
+    real roottol;
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+/*     from BLAS */
+/*     from LAPACK */
+/*     from BLAS */
+/*     from LAPACK */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    --sva;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    --work;
+
+    /* Function Body */
+    lsvec = lsame_(jobu, "U");
+    uctol = lsame_(jobu, "C");
+    rsvec = lsame_(jobv, "V");
+    applv = lsame_(jobv, "A");
+    upper = lsame_(joba, "U");
+    lower = lsame_(joba, "L");
+
+    if (! (upper || lower || lsame_(joba, "G"))) {
+	*info = -1;
+    } else if (! (lsvec || uctol || lsame_(jobu, "N"))) 
+	    {
+	*info = -2;
+    } else if (! (rsvec || applv || lsame_(jobv, "N"))) 
+	    {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0 || *n > *m) {
+	*info = -5;
+    } else if (*lda < *m) {
+	*info = -7;
+    } else if (*mv < 0) {
+	*info = -9;
+    } else if (rsvec && *ldv < *n || applv && *ldv < *mv) {
+	*info = -11;
+    } else if (uctol && work[1] <= 1.f) {
+	*info = -12;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = *m + *n;
+	if (*lwork < f2cmax(i__1,6)) {
+	    *info = -13;
+	} else {
+	    *info = 0;
+	}
+    }
+
+/*     #:( */
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESVJ", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/* #:) Quick return for void matrix */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Set numerical parameters */
+/*     The stopping criterion for Jacobi rotations is */
+
+/*     max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS */
+
+/*     where EPS is the round-off and CTOL is defined as follows: */
+
+    if (uctol) {
+/*        ... user controlled */
+	ctol = work[1];
+    } else {
+/*        ... default */
+	if (lsvec || rsvec || applv) {
+	    ctol = sqrt((real) (*m));
+	} else {
+	    ctol = (real) (*m);
+	}
+    }
+/*     ... and the machine dependent parameters are */
+/* [!]  (Make sure that SLAMCH() works properly on the target machine.) */
+
+    epsln = slamch_("Epsilon");
+    rooteps = sqrt(epsln);
+    sfmin = slamch_("SafeMinimum");
+    rootsfmin = sqrt(sfmin);
+    small = sfmin / epsln;
+    big = slamch_("Overflow");
+/*     BIG         = ONE    / SFMIN */
+    rootbig = 1.f / rootsfmin;
+    large = big / sqrt((real) (*m * *n));
+    bigtheta = 1.f / rooteps;
+
+    tol = ctol * epsln;
+    roottol = sqrt(tol);
+
+    if ((real) (*m) * epsln >= 1.f) {
+	*info = -4;
+	i__1 = -(*info);
+	xerbla_("SGESVJ", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Initialize the right singular vector matrix. */
+
+    if (rsvec) {
+	mvl = *n;
+	slaset_("A", &mvl, n, &c_b17, &c_b18, &v[v_offset], ldv);
+    } else if (applv) {
+	mvl = *mv;
+    }
+    rsvec = rsvec || applv;
+
+/*     Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) */
+/* (!)  If necessary, scale A to protect the largest singular value */
+/*     from overflow. It is possible that saving the largest singular */
+/*     value destroys the information about the small ones. */
+/*     This initial scaling is almost minimal in the sense that the */
+/*     goal is to make sure that no column norm overflows, and that */
+/*     SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries */
+/*     in A are detected, the procedure returns with INFO=-6. */
+
+    skl = 1.f / sqrt((real) (*m) * (real) (*n));
+    noscale = TRUE_;
+    goscale = TRUE_;
+
+    if (lower) {
+/*        the input matrix is M-by-N lower triangular (trapezoidal) */
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    aapp = 0.f;
+	    aaqq = 1.f;
+	    i__2 = *m - p + 1;
+	    slassq_(&i__2, &a[p + p * a_dim1], &c__1, &aapp, &aaqq);
+	    if (aapp > big) {
+		*info = -6;
+		i__2 = -(*info);
+		xerbla_("SGESVJ", &i__2, (ftnlen)6);
+		return 0;
+	    }
+	    aaqq = sqrt(aaqq);
+	    if (aapp < big / aaqq && noscale) {
+		sva[p] = aapp * aaqq;
+	    } else {
+		noscale = FALSE_;
+		sva[p] = aapp * (aaqq * skl);
+		if (goscale) {
+		    goscale = FALSE_;
+		    i__2 = p - 1;
+		    for (q = 1; q <= i__2; ++q) {
+			sva[q] *= skl;
+/* L1873: */
+		    }
+		}
+	    }
+/* L1874: */
+	}
+    } else if (upper) {
+/*        the input matrix is M-by-N upper triangular (trapezoidal) */
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    aapp = 0.f;
+	    aaqq = 1.f;
+	    slassq_(&p, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
+	    if (aapp > big) {
+		*info = -6;
+		i__2 = -(*info);
+		xerbla_("SGESVJ", &i__2, (ftnlen)6);
+		return 0;
+	    }
+	    aaqq = sqrt(aaqq);
+	    if (aapp < big / aaqq && noscale) {
+		sva[p] = aapp * aaqq;
+	    } else {
+		noscale = FALSE_;
+		sva[p] = aapp * (aaqq * skl);
+		if (goscale) {
+		    goscale = FALSE_;
+		    i__2 = p - 1;
+		    for (q = 1; q <= i__2; ++q) {
+			sva[q] *= skl;
+/* L2873: */
+		    }
+		}
+	    }
+/* L2874: */
+	}
+    } else {
+/*        the input matrix is M-by-N general dense */
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    aapp = 0.f;
+	    aaqq = 1.f;
+	    slassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
+	    if (aapp > big) {
+		*info = -6;
+		i__2 = -(*info);
+		xerbla_("SGESVJ", &i__2, (ftnlen)6);
+		return 0;
+	    }
+	    aaqq = sqrt(aaqq);
+	    if (aapp < big / aaqq && noscale) {
+		sva[p] = aapp * aaqq;
+	    } else {
+		noscale = FALSE_;
+		sva[p] = aapp * (aaqq * skl);
+		if (goscale) {
+		    goscale = FALSE_;
+		    i__2 = p - 1;
+		    for (q = 1; q <= i__2; ++q) {
+			sva[q] *= skl;
+/* L3873: */
+		    }
+		}
+	    }
+/* L3874: */
+	}
+    }
+
+    if (noscale) {
+	skl = 1.f;
+    }
+
+/*     Move the smaller part of the spectrum from the underflow threshold */
+/* (!)  Start by determining the position of the nonzero entries of the */
+/*     array SVA() relative to ( SFMIN, BIG ). */
+
+    aapp = 0.f;
+    aaqq = big;
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+	if (sva[p] != 0.f) {
+/* Computing MIN */
+	    r__1 = aaqq, r__2 = sva[p];
+	    aaqq = f2cmin(r__1,r__2);
+	}
+/* Computing MAX */
+	r__1 = aapp, r__2 = sva[p];
+	aapp = f2cmax(r__1,r__2);
+/* L4781: */
+    }
+
+/* #:) Quick return for zero matrix */
+
+    if (aapp == 0.f) {
+	if (lsvec) {
+	    slaset_("G", m, n, &c_b17, &c_b18, &a[a_offset], lda);
+	}
+	work[1] = 1.f;
+	work[2] = 0.f;
+	work[3] = 0.f;
+	work[4] = 0.f;
+	work[5] = 0.f;
+	work[6] = 0.f;
+	return 0;
+    }
+
+/* #:) Quick return for one-column matrix */
+
+    if (*n == 1) {
+	if (lsvec) {
+	    slascl_("G", &c__0, &c__0, &sva[1], &skl, m, &c__1, &a[a_dim1 + 1]
+		    , lda, &ierr);
+	}
+	work[1] = 1.f / skl;
+	if (sva[1] >= sfmin) {
+	    work[2] = 1.f;
+	} else {
+	    work[2] = 0.f;
+	}
+	work[3] = 0.f;
+	work[4] = 0.f;
+	work[5] = 0.f;
+	work[6] = 0.f;
+	return 0;
+    }
+
+/*     Protect small singular values from underflow, and try to */
+/*     avoid underflows/overflows in computing Jacobi rotations. */
+
+    sn = sqrt(sfmin / epsln);
+    temp1 = sqrt(big / (real) (*n));
+    if (aapp <= sn || aaqq >= temp1 || sn <= aaqq && aapp <= temp1) {
+/* Computing MIN */
+	r__1 = big, r__2 = temp1 / aapp;
+	temp1 = f2cmin(r__1,r__2);
+/*         AAQQ  = AAQQ*TEMP1 */
+/*         AAPP  = AAPP*TEMP1 */
+    } else if (aaqq <= sn && aapp <= temp1) {
+/* Computing MIN */
+	r__1 = sn / aaqq, r__2 = big / (aapp * sqrt((real) (*n)));
+	temp1 = f2cmin(r__1,r__2);
+/*         AAQQ  = AAQQ*TEMP1 */
+/*         AAPP  = AAPP*TEMP1 */
+    } else if (aaqq >= sn && aapp >= temp1) {
+/* Computing MAX */
+	r__1 = sn / aaqq, r__2 = temp1 / aapp;
+	temp1 = f2cmax(r__1,r__2);
+/*         AAQQ  = AAQQ*TEMP1 */
+/*         AAPP  = AAPP*TEMP1 */
+    } else if (aaqq <= sn && aapp >= temp1) {
+/* Computing MIN */
+	r__1 = sn / aaqq, r__2 = big / (sqrt((real) (*n)) * aapp);
+	temp1 = f2cmin(r__1,r__2);
+/*         AAQQ  = AAQQ*TEMP1 */
+/*         AAPP  = AAPP*TEMP1 */
+    } else {
+	temp1 = 1.f;
+    }
+
+/*     Scale, if necessary */
+
+    if (temp1 != 1.f) {
+	slascl_("G", &c__0, &c__0, &c_b18, &temp1, n, &c__1, &sva[1], n, &
+		ierr);
+    }
+    skl = temp1 * skl;
+    if (skl != 1.f) {
+	slascl_(joba, &c__0, &c__0, &c_b18, &skl, m, n, &a[a_offset], lda, &
+		ierr);
+	skl = 1.f / skl;
+    }
+
+/*     Row-cyclic Jacobi SVD algorithm with column pivoting */
+
+    emptsw = *n * (*n - 1) / 2;
+    notrot = 0;
+    fastr[0] = 0.f;
+
+/*     A is represented in factored form A = A * diag(WORK), where diag(WORK) */
+/*     is initialized to identity. WORK is updated during fast scaled */
+/*     rotations. */
+
+    i__1 = *n;
+    for (q = 1; q <= i__1; ++q) {
+	work[q] = 1.f;
+/* L1868: */
+    }
+
+
+    swband = 3;
+/* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */
+/*     if SGESVJ is used as a computational routine in the preconditioned */
+/*     Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure */
+/*     works on pivots inside a band-like region around the diagonal. */
+/*     The boundaries are determined dynamically, based on the number of */
+/*     pivots above a threshold. */
+
+    kbl = f2cmin(8,*n);
+/* [TP] KBL is a tuning parameter that defines the tile size in the */
+/*     tiling of the p-q loops of pivot pairs. In general, an optimal */
+/*     value of KBL depends on the matrix dimensions and on the */
+/*     parameters of the computer's memory. */
+
+    nbl = *n / kbl;
+    if (nbl * kbl != *n) {
+	++nbl;
+    }
+
+/* Computing 2nd power */
+    i__1 = kbl;
+    blskip = i__1 * i__1;
+/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */
+
+    rowskip = f2cmin(5,kbl);
+/* [TP] ROWSKIP is a tuning parameter. */
+
+    lkahead = 1;
+/* [TP] LKAHEAD is a tuning parameter. */
+
+/*     Quasi block transformations, using the lower (upper) triangular */
+/*     structure of the input matrix. The quasi-block-cycling usually */
+/*     invokes cubic convergence. Big part of this cycle is done inside */
+/*     canonical subspaces of dimensions less than M. */
+
+/* Computing MAX */
+    i__1 = 64, i__2 = kbl << 2;
+    if ((lower || upper) && *n > f2cmax(i__1,i__2)) {
+/* [TP] The number of partition levels and the actual partition are */
+/*     tuning parameters. */
+	n4 = *n / 4;
+	n2 = *n / 2;
+	n34 = n4 * 3;
+	if (applv) {
+	    q = 0;
+	} else {
+	    q = 1;
+	}
+
+	if (lower) {
+
+/*     This works very well on lower triangular matrices, in particular */
+/*     in the framework of the preconditioned Jacobi SVD (xGEJSV). */
+/*     The idea is simple: */
+/*     [+ 0 0 0]   Note that Jacobi transformations of [0 0] */
+/*     [+ + 0 0]                                       [0 0] */
+/*     [+ + x 0]   actually work on [x 0]              [x 0] */
+/*     [+ + x x]                    [x x].             [x x] */
+
+	    i__1 = *m - n34;
+	    i__2 = *n - n34;
+	    i__3 = *lwork - *n;
+	    sgsvj0_(jobv, &i__1, &i__2, &a[n34 + 1 + (n34 + 1) * a_dim1], lda,
+		     &work[n34 + 1], &sva[n34 + 1], &mvl, &v[n34 * q + 1 + (
+		    n34 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, &
+		    work[*n + 1], &i__3, &ierr);
+
+	    i__1 = *m - n2;
+	    i__2 = n34 - n2;
+	    i__3 = *lwork - *n;
+	    sgsvj0_(jobv, &i__1, &i__2, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, &
+		    work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1)
+		     * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, &work[*n + 
+		    1], &i__3, &ierr);
+
+	    i__1 = *m - n2;
+	    i__2 = *n - n2;
+	    i__3 = *lwork - *n;
+	    sgsvj1_(jobv, &i__1, &i__2, &n4, &a[n2 + 1 + (n2 + 1) * a_dim1], 
+		    lda, &work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (
+		    n2 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &
+		    work[*n + 1], &i__3, &ierr);
+
+	    i__1 = *m - n4;
+	    i__2 = n2 - n4;
+	    i__3 = *lwork - *n;
+	    sgsvj0_(jobv, &i__1, &i__2, &a[n4 + 1 + (n4 + 1) * a_dim1], lda, &
+		    work[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1)
+		     * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 
+		    1], &i__3, &ierr);
+
+	    i__1 = *lwork - *n;
+	    sgsvj0_(jobv, m, &n4, &a[a_offset], lda, &work[1], &sva[1], &mvl, 
+		    &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n 
+		    + 1], &i__1, &ierr);
+
+	    i__1 = *lwork - *n;
+	    sgsvj1_(jobv, m, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], &
+		    mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &
+		    work[*n + 1], &i__1, &ierr);
+
+
+	} else if (upper) {
+
+
+	    i__1 = *lwork - *n;
+	    sgsvj0_(jobv, &n4, &n4, &a[a_offset], lda, &work[1], &sva[1], &
+		    mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__2, &
+		    work[*n + 1], &i__1, &ierr);
+
+	    i__1 = *lwork - *n;
+	    sgsvj0_(jobv, &n2, &n4, &a[(n4 + 1) * a_dim1 + 1], lda, &work[n4 
+		    + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) * 
+		    v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 1], 
+		    &i__1, &ierr);
+
+	    i__1 = *lwork - *n;
+	    sgsvj1_(jobv, &n2, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1],
+		     &mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &
+		    work[*n + 1], &i__1, &ierr);
+
+	    i__1 = n2 + n4;
+	    i__2 = *lwork - *n;
+	    sgsvj0_(jobv, &i__1, &n4, &a[(n2 + 1) * a_dim1 + 1], lda, &work[
+		    n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) * 
+		    v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 1], 
+		    &i__2, &ierr);
+	}
+
+    }
+
+
+    for (i__ = 1; i__ <= 30; ++i__) {
+
+
+	mxaapq = 0.f;
+	mxsinj = 0.f;
+	iswrot = 0;
+
+	notrot = 0;
+	pskipped = 0;
+
+/*     Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */
+/*     1 <= p < q <= N. This is the first step toward a blocked implementation */
+/*     of the rotations. New implementation, based on block transformations, */
+/*     is under development. */
+
+	i__1 = nbl;
+	for (ibr = 1; ibr <= i__1; ++ibr) {
+
+	    igl = (ibr - 1) * kbl + 1;
+
+/* Computing MIN */
+	    i__3 = lkahead, i__4 = nbl - ibr;
+	    i__2 = f2cmin(i__3,i__4);
+	    for (ir1 = 0; ir1 <= i__2; ++ir1) {
+
+		igl += ir1 * kbl;
+
+/* Computing MIN */
+		i__4 = igl + kbl - 1, i__5 = *n - 1;
+		i__3 = f2cmin(i__4,i__5);
+		for (p = igl; p <= i__3; ++p) {
+
+
+		    i__4 = *n - p + 1;
+		    q = isamax_(&i__4, &sva[p], &c__1) + p - 1;
+		    if (p != q) {
+			sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 
+				1], &c__1);
+			if (rsvec) {
+			    sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				    v_dim1 + 1], &c__1);
+			}
+			temp1 = sva[p];
+			sva[p] = sva[q];
+			sva[q] = temp1;
+			temp1 = work[p];
+			work[p] = work[q];
+			work[q] = temp1;
+		    }
+
+		    if (ir1 == 0) {
+
+/*        Column norms are periodically updated by explicit */
+/*        norm computation. */
+/*        Caveat: */
+/*        Unfortunately, some BLAS implementations compute SNRM2(M,A(1,p),1) */
+/*        as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to */
+/*        overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to */
+/*        underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). */
+/*        Hence, SNRM2 cannot be trusted, not even in the case when */
+/*        the true norm is far from the under(over)flow boundaries. */
+/*        If properly implemented SNRM2 is available, the IF-THEN-ELSE */
+/*        below should read "AAPP = SNRM2( M, A(1,p), 1 ) * WORK(p)". */
+
+			if (sva[p] < rootbig && sva[p] > rootsfmin) {
+			    sva[p] = snrm2_(m, &a[p * a_dim1 + 1], &c__1) * 
+				    work[p];
+			} else {
+			    temp1 = 0.f;
+			    aapp = 1.f;
+			    slassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, &
+				    aapp);
+			    sva[p] = temp1 * sqrt(aapp) * work[p];
+			}
+			aapp = sva[p];
+		    } else {
+			aapp = sva[p];
+		    }
+
+		    if (aapp > 0.f) {
+
+			pskipped = 0;
+
+/* Computing MIN */
+			i__5 = igl + kbl - 1;
+			i__4 = f2cmin(i__5,*n);
+			for (q = p + 1; q <= i__4; ++q) {
+
+			    aaqq = sva[q];
+
+			    if (aaqq > 0.f) {
+
+				aapp0 = aapp;
+				if (aaqq >= 1.f) {
+				    rotok = small * aapp <= aaqq;
+				    if (aapp < big / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * work[p] * work[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					slascl_("G", &c__0, &c__0, &aapp, &
+						work[p], m, &c__1, &work[*n + 
+						1], lda, &ierr);
+					aapq = sdot_(m, &work[*n + 1], &c__1, 
+						&a[q * a_dim1 + 1], &c__1) * 
+						work[q] / aaqq;
+				    }
+				} else {
+				    rotok = aapp <= aaqq / small;
+				    if (aapp > small / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * work[p] * work[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[q * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					slascl_("G", &c__0, &c__0, &aaqq, &
+						work[q], m, &c__1, &work[*n + 
+						1], lda, &ierr);
+					aapq = sdot_(m, &work[*n + 1], &c__1, 
+						&a[p * a_dim1 + 1], &c__1) * 
+						work[p] / aapp;
+				    }
+				}
+
+/* Computing MAX */
+				r__1 = mxaapq, r__2 = abs(aapq);
+				mxaapq = f2cmax(r__1,r__2);
+
+/*        TO rotate or NOT to rotate, THAT is the question ... */
+
+				if (abs(aapq) > tol) {
+
+/* [RTD]      ROTATED = ROTATED + ONE */
+
+				    if (ir1 == 0) {
+					notrot = 0;
+					pskipped = 0;
+					++iswrot;
+				    }
+
+				    if (rotok) {
+
+					aqoap = aaqq / aapp;
+					apoaq = aapp / aaqq;
+					theta = (r__1 = aqoap - apoaq, abs(
+						r__1)) * -.5f / aapq;
+
+					if (abs(theta) > bigtheta) {
+
+					    t = .5f / theta;
+					    fastr[2] = t * work[p] / work[q];
+					    fastr[3] = -t * work[q] / work[p];
+					    srotm_(m, &a[p * a_dim1 + 1], &
+						    c__1, &a[q * a_dim1 + 1], 
+						    &c__1, fastr);
+					    if (rsvec) {
+			  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				  v_dim1 + 1], &c__1, fastr);
+					    }
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(t);
+					    mxsinj = f2cmax(r__1,r__2);
+
+					} else {
+
+
+					    thsign = -r_sign(&c_b18, &aapq);
+					    t = 1.f / (theta + thsign * sqrt(
+						    theta * theta + 1.f));
+					    cs = sqrt(1.f / (t * t + 1.f));
+					    sn = t * cs;
+
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(sn);
+					    mxsinj = f2cmax(r__1,r__2);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+
+					    apoaq = work[p] / work[q];
+					    aqoap = work[q] / work[p];
+					    if (work[p] >= 1.f) {
+			  if (work[q] >= 1.f) {
+			      fastr[2] = t * apoaq;
+			      fastr[3] = -t * aqoap;
+			      work[p] *= cs;
+			      work[q] *= cs;
+			      srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * 
+				      a_dim1 + 1], &c__1, fastr);
+			      if (rsvec) {
+				  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
+					  q * v_dim1 + 1], &c__1, fastr);
+			      }
+			  } else {
+			      r__1 = -t * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      r__1 = cs * sn * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      work[p] *= cs;
+			      work[q] /= cs;
+			      if (rsvec) {
+				  r__1 = -t * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+			      }
+			  }
+					    } else {
+			  if (work[q] >= 1.f) {
+			      r__1 = t * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      r__1 = -cs * sn * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      work[p] /= cs;
+			      work[q] *= cs;
+			      if (rsvec) {
+				  r__1 = t * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+			      }
+			  } else {
+			      if (work[p] >= work[q]) {
+				  r__1 = -t * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  work[p] *= cs;
+				  work[q] /= cs;
+				  if (rsvec) {
+				      r__1 = -t * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				      r__1 = cs * sn * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				  }
+			      } else {
+				  r__1 = t * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  work[p] /= cs;
+				  work[q] *= cs;
+				  if (rsvec) {
+				      r__1 = t * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				      r__1 = -cs * sn * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				  }
+			      }
+			  }
+					    }
+					}
+
+				    } else {
+					scopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					slascl_("G", &c__0, &c__0, &aapp, &
+						c_b18, m, &c__1, &work[*n + 1]
+						, lda, &ierr);
+					slascl_("G", &c__0, &c__0, &aaqq, &
+						c_b18, m, &c__1, &a[q * 
+						a_dim1 + 1], lda, &ierr);
+					temp1 = -aapq * work[p] / work[q];
+					saxpy_(m, &temp1, &work[*n + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1);
+					slascl_("G", &c__0, &c__0, &c_b18, &
+						aaqq, m, &c__1, &a[q * a_dim1 
+						+ 1], lda, &ierr);
+/* Computing MAX */
+					r__1 = 0.f, r__2 = 1.f - aapq * aapq;
+					sva[q] = aaqq * sqrt((f2cmax(r__1,r__2)))
+						;
+					mxsinj = f2cmax(mxsinj,sfmin);
+				    }
+/*           END IF ROTOK THEN ... ELSE */
+
+/*           In the case of cancellation in updating SVA(q), SVA(p) */
+/*           recompute SVA(q), SVA(p). */
+
+/* Computing 2nd power */
+				    r__1 = sva[q] / aaqq;
+				    if (r__1 * r__1 <= rooteps) {
+					if (aaqq < rootbig && aaqq > 
+						rootsfmin) {
+					    sva[q] = snrm2_(m, &a[q * a_dim1 
+						    + 1], &c__1) * work[q];
+					} else {
+					    t = 0.f;
+					    aaqq = 1.f;
+					    slassq_(m, &a[q * a_dim1 + 1], &
+						    c__1, &t, &aaqq);
+					    sva[q] = t * sqrt(aaqq) * work[q];
+					}
+				    }
+				    if (aapp / aapp0 <= rooteps) {
+					if (aapp < rootbig && aapp > 
+						rootsfmin) {
+					    aapp = snrm2_(m, &a[p * a_dim1 + 
+						    1], &c__1) * work[p];
+					} else {
+					    t = 0.f;
+					    aapp = 1.f;
+					    slassq_(m, &a[p * a_dim1 + 1], &
+						    c__1, &t, &aapp);
+					    aapp = t * sqrt(aapp) * work[p];
+					}
+					sva[p] = aapp;
+				    }
+
+				} else {
+/*        A(:,p) and A(:,q) already numerically orthogonal */
+				    if (ir1 == 0) {
+					++notrot;
+				    }
+/* [RTD]      SKIPPED  = SKIPPED  + 1 */
+				    ++pskipped;
+				}
+			    } else {
+/*        A(:,q) is zero column */
+				if (ir1 == 0) {
+				    ++notrot;
+				}
+				++pskipped;
+			    }
+
+			    if (i__ <= swband && pskipped > rowskip) {
+				if (ir1 == 0) {
+				    aapp = -aapp;
+				}
+				notrot = 0;
+				goto L2103;
+			    }
+
+/* L2002: */
+			}
+/*     END q-LOOP */
+
+L2103:
+/*     bailed out of q-loop */
+
+			sva[p] = aapp;
+
+		    } else {
+			sva[p] = aapp;
+			if (ir1 == 0 && aapp == 0.f) {
+/* Computing MIN */
+			    i__4 = igl + kbl - 1;
+			    notrot = notrot + f2cmin(i__4,*n) - p;
+			}
+		    }
+
+/* L2001: */
+		}
+/*     end of the p-loop */
+/*     end of doing the block ( ibr, ibr ) */
+/* L1002: */
+	    }
+/*     end of ir1-loop */
+
+/* ... go to the off diagonal blocks */
+
+	    igl = (ibr - 1) * kbl + 1;
+
+	    i__2 = nbl;
+	    for (jbc = ibr + 1; jbc <= i__2; ++jbc) {
+
+		jgl = (jbc - 1) * kbl + 1;
+
+/*        doing the block at ( ibr, jbc ) */
+
+		ijblsk = 0;
+/* Computing MIN */
+		i__4 = igl + kbl - 1;
+		i__3 = f2cmin(i__4,*n);
+		for (p = igl; p <= i__3; ++p) {
+
+		    aapp = sva[p];
+		    if (aapp > 0.f) {
+
+			pskipped = 0;
+
+/* Computing MIN */
+			i__5 = jgl + kbl - 1;
+			i__4 = f2cmin(i__5,*n);
+			for (q = jgl; q <= i__4; ++q) {
+
+			    aaqq = sva[q];
+			    if (aaqq > 0.f) {
+				aapp0 = aapp;
+
+
+/*        Safe Gram matrix computation */
+
+				if (aaqq >= 1.f) {
+				    if (aapp >= aaqq) {
+					rotok = small * aapp <= aaqq;
+				    } else {
+					rotok = small * aaqq <= aapp;
+				    }
+				    if (aapp < big / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * work[p] * work[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					slascl_("G", &c__0, &c__0, &aapp, &
+						work[p], m, &c__1, &work[*n + 
+						1], lda, &ierr);
+					aapq = sdot_(m, &work[*n + 1], &c__1, 
+						&a[q * a_dim1 + 1], &c__1) * 
+						work[q] / aaqq;
+				    }
+				} else {
+				    if (aapp >= aaqq) {
+					rotok = aapp <= aaqq / small;
+				    } else {
+					rotok = aaqq <= aapp / small;
+				    }
+				    if (aapp > small / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * work[p] * work[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[q * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					slascl_("G", &c__0, &c__0, &aaqq, &
+						work[q], m, &c__1, &work[*n + 
+						1], lda, &ierr);
+					aapq = sdot_(m, &work[*n + 1], &c__1, 
+						&a[p * a_dim1 + 1], &c__1) * 
+						work[p] / aapp;
+				    }
+				}
+
+/* Computing MAX */
+				r__1 = mxaapq, r__2 = abs(aapq);
+				mxaapq = f2cmax(r__1,r__2);
+
+/*        TO rotate or NOT to rotate, THAT is the question ... */
+
+				if (abs(aapq) > tol) {
+				    notrot = 0;
+/* [RTD]      ROTATED  = ROTATED + 1 */
+				    pskipped = 0;
+				    ++iswrot;
+
+				    if (rotok) {
+
+					aqoap = aaqq / aapp;
+					apoaq = aapp / aaqq;
+					theta = (r__1 = aqoap - apoaq, abs(
+						r__1)) * -.5f / aapq;
+					if (aaqq > aapp0) {
+					    theta = -theta;
+					}
+
+					if (abs(theta) > bigtheta) {
+					    t = .5f / theta;
+					    fastr[2] = t * work[p] / work[q];
+					    fastr[3] = -t * work[q] / work[p];
+					    srotm_(m, &a[p * a_dim1 + 1], &
+						    c__1, &a[q * a_dim1 + 1], 
+						    &c__1, fastr);
+					    if (rsvec) {
+			  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				  v_dim1 + 1], &c__1, fastr);
+					    }
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(t);
+					    mxsinj = f2cmax(r__1,r__2);
+					} else {
+
+
+					    thsign = -r_sign(&c_b18, &aapq);
+					    if (aaqq > aapp0) {
+			  thsign = -thsign;
+					    }
+					    t = 1.f / (theta + thsign * sqrt(
+						    theta * theta + 1.f));
+					    cs = sqrt(1.f / (t * t + 1.f));
+					    sn = t * cs;
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(sn);
+					    mxsinj = f2cmax(r__1,r__2);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+
+					    apoaq = work[p] / work[q];
+					    aqoap = work[q] / work[p];
+					    if (work[p] >= 1.f) {
+
+			  if (work[q] >= 1.f) {
+			      fastr[2] = t * apoaq;
+			      fastr[3] = -t * aqoap;
+			      work[p] *= cs;
+			      work[q] *= cs;
+			      srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * 
+				      a_dim1 + 1], &c__1, fastr);
+			      if (rsvec) {
+				  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
+					  q * v_dim1 + 1], &c__1, fastr);
+			      }
+			  } else {
+			      r__1 = -t * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      r__1 = cs * sn * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      if (rsvec) {
+				  r__1 = -t * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+			      }
+			      work[p] *= cs;
+			      work[q] /= cs;
+			  }
+					    } else {
+			  if (work[q] >= 1.f) {
+			      r__1 = t * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      r__1 = -cs * sn * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      if (rsvec) {
+				  r__1 = t * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+			      }
+			      work[p] /= cs;
+			      work[q] *= cs;
+			  } else {
+			      if (work[p] >= work[q]) {
+				  r__1 = -t * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  work[p] *= cs;
+				  work[q] /= cs;
+				  if (rsvec) {
+				      r__1 = -t * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				      r__1 = cs * sn * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				  }
+			      } else {
+				  r__1 = t * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  work[p] /= cs;
+				  work[q] *= cs;
+				  if (rsvec) {
+				      r__1 = t * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				      r__1 = -cs * sn * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				  }
+			      }
+			  }
+					    }
+					}
+
+				    } else {
+					if (aapp > aaqq) {
+					    scopy_(m, &a[p * a_dim1 + 1], &
+						    c__1, &work[*n + 1], &
+						    c__1);
+					    slascl_("G", &c__0, &c__0, &aapp, 
+						    &c_b18, m, &c__1, &work[*
+						    n + 1], lda, &ierr);
+					    slascl_("G", &c__0, &c__0, &aaqq, 
+						    &c_b18, m, &c__1, &a[q * 
+						    a_dim1 + 1], lda, &ierr);
+					    temp1 = -aapq * work[p] / work[q];
+					    saxpy_(m, &temp1, &work[*n + 1], &
+						    c__1, &a[q * a_dim1 + 1], 
+						    &c__1);
+					    slascl_("G", &c__0, &c__0, &c_b18,
+						     &aaqq, m, &c__1, &a[q * 
+						    a_dim1 + 1], lda, &ierr);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - aapq * 
+						    aapq;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+					    mxsinj = f2cmax(mxsinj,sfmin);
+					} else {
+					    scopy_(m, &a[q * a_dim1 + 1], &
+						    c__1, &work[*n + 1], &
+						    c__1);
+					    slascl_("G", &c__0, &c__0, &aaqq, 
+						    &c_b18, m, &c__1, &work[*
+						    n + 1], lda, &ierr);
+					    slascl_("G", &c__0, &c__0, &aapp, 
+						    &c_b18, m, &c__1, &a[p * 
+						    a_dim1 + 1], lda, &ierr);
+					    temp1 = -aapq * work[q] / work[p];
+					    saxpy_(m, &temp1, &work[*n + 1], &
+						    c__1, &a[p * a_dim1 + 1], 
+						    &c__1);
+					    slascl_("G", &c__0, &c__0, &c_b18,
+						     &aapp, m, &c__1, &a[p * 
+						    a_dim1 + 1], lda, &ierr);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - aapq * 
+						    aapq;
+					    sva[p] = aapp * sqrt((f2cmax(r__1,
+						    r__2)));
+					    mxsinj = f2cmax(mxsinj,sfmin);
+					}
+				    }
+/*           END IF ROTOK THEN ... ELSE */
+
+/*           In the case of cancellation in updating SVA(q) */
+/* Computing 2nd power */
+				    r__1 = sva[q] / aaqq;
+				    if (r__1 * r__1 <= rooteps) {
+					if (aaqq < rootbig && aaqq > 
+						rootsfmin) {
+					    sva[q] = snrm2_(m, &a[q * a_dim1 
+						    + 1], &c__1) * work[q];
+					} else {
+					    t = 0.f;
+					    aaqq = 1.f;
+					    slassq_(m, &a[q * a_dim1 + 1], &
+						    c__1, &t, &aaqq);
+					    sva[q] = t * sqrt(aaqq) * work[q];
+					}
+				    }
+/* Computing 2nd power */
+				    r__1 = aapp / aapp0;
+				    if (r__1 * r__1 <= rooteps) {
+					if (aapp < rootbig && aapp > 
+						rootsfmin) {
+					    aapp = snrm2_(m, &a[p * a_dim1 + 
+						    1], &c__1) * work[p];
+					} else {
+					    t = 0.f;
+					    aapp = 1.f;
+					    slassq_(m, &a[p * a_dim1 + 1], &
+						    c__1, &t, &aapp);
+					    aapp = t * sqrt(aapp) * work[p];
+					}
+					sva[p] = aapp;
+				    }
+/*              end of OK rotation */
+				} else {
+				    ++notrot;
+/* [RTD]      SKIPPED  = SKIPPED  + 1 */
+				    ++pskipped;
+				    ++ijblsk;
+				}
+			    } else {
+				++notrot;
+				++pskipped;
+				++ijblsk;
+			    }
+
+			    if (i__ <= swband && ijblsk >= blskip) {
+				sva[p] = aapp;
+				notrot = 0;
+				goto L2011;
+			    }
+			    if (i__ <= swband && pskipped > rowskip) {
+				aapp = -aapp;
+				notrot = 0;
+				goto L2203;
+			    }
+
+/* L2200: */
+			}
+/*        end of the q-loop */
+L2203:
+
+			sva[p] = aapp;
+
+		    } else {
+
+			if (aapp == 0.f) {
+/* Computing MIN */
+			    i__4 = jgl + kbl - 1;
+			    notrot = notrot + f2cmin(i__4,*n) - jgl + 1;
+			}
+			if (aapp < 0.f) {
+			    notrot = 0;
+			}
+
+		    }
+
+/* L2100: */
+		}
+/*     end of the p-loop */
+/* L2010: */
+	    }
+/*     end of the jbc-loop */
+L2011:
+/* 2011 bailed out of the jbc-loop */
+/* Computing MIN */
+	    i__3 = igl + kbl - 1;
+	    i__2 = f2cmin(i__3,*n);
+	    for (p = igl; p <= i__2; ++p) {
+		sva[p] = (r__1 = sva[p], abs(r__1));
+/* L2012: */
+	    }
+/* ** */
+/* L2000: */
+	}
+/* 2000 :: end of the ibr-loop */
+
+	if (sva[*n] < rootbig && sva[*n] > rootsfmin) {
+	    sva[*n] = snrm2_(m, &a[*n * a_dim1 + 1], &c__1) * work[*n];
+	} else {
+	    t = 0.f;
+	    aapp = 1.f;
+	    slassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp);
+	    sva[*n] = t * sqrt(aapp) * work[*n];
+	}
+
+/*     Additional steering devices */
+
+	if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) {
+	    swband = i__;
+	}
+
+	if (i__ > swband + 1 && mxaapq < sqrt((real) (*n)) * tol && (real) (*
+		n) * mxaapq * mxsinj < tol) {
+	    goto L1994;
+	}
+
+	if (notrot >= emptsw) {
+	    goto L1994;
+	}
+
+/* L1993: */
+    }
+/*     end i=1:NSWEEP loop */
+
+/* #:( Reaching this point means that the procedure has not converged. */
+    *info = 29;
+    goto L1995;
+
+L1994:
+/* #:) Reaching this point means numerical convergence after the i-th */
+/*     sweep. */
+
+    *info = 0;
+/* #:) INFO = 0 confirms successful iterations. */
+L1995:
+
+/*     Sort the singular values and find how many are above */
+/*     the underflow threshold. */
+
+    n2 = 0;
+    n4 = 0;
+    i__1 = *n - 1;
+    for (p = 1; p <= i__1; ++p) {
+	i__2 = *n - p + 1;
+	q = isamax_(&i__2, &sva[p], &c__1) + p - 1;
+	if (p != q) {
+	    temp1 = sva[p];
+	    sva[p] = sva[q];
+	    sva[q] = temp1;
+	    temp1 = work[p];
+	    work[p] = work[q];
+	    work[q] = temp1;
+	    sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1);
+	    if (rsvec) {
+		sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &
+			c__1);
+	    }
+	}
+	if (sva[p] != 0.f) {
+	    ++n4;
+	    if (sva[p] * skl > sfmin) {
+		++n2;
+	    }
+	}
+/* L5991: */
+    }
+    if (sva[*n] != 0.f) {
+	++n4;
+	if (sva[*n] * skl > sfmin) {
+	    ++n2;
+	}
+    }
+
+/*     Normalize the left singular vectors. */
+
+    if (lsvec || uctol) {
+	i__1 = n2;
+	for (p = 1; p <= i__1; ++p) {
+	    r__1 = work[p] / sva[p];
+	    sscal_(m, &r__1, &a[p * a_dim1 + 1], &c__1);
+/* L1998: */
+	}
+    }
+
+/*     Scale the product of Jacobi rotations (assemble the fast rotations). */
+
+    if (rsvec) {
+	if (applv) {
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		sscal_(&mvl, &work[p], &v[p * v_dim1 + 1], &c__1);
+/* L2398: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		temp1 = 1.f / snrm2_(&mvl, &v[p * v_dim1 + 1], &c__1);
+		sscal_(&mvl, &temp1, &v[p * v_dim1 + 1], &c__1);
+/* L2399: */
+	    }
+	}
+    }
+
+/*     Undo scaling, if necessary (and possible). */
+    if (skl > 1.f && sva[1] < big / skl || skl < 1.f && sva[f2cmax(n2,1)] > 
+	    sfmin / skl) {
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    sva[p] = skl * sva[p];
+/* L2400: */
+	}
+	skl = 1.f;
+    }
+
+    work[1] = skl;
+/*     The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE */
+/*     then some of the singular values may overflow or underflow and */
+/*     the spectrum is given in this factored representation. */
+
+    work[2] = (real) n4;
+/*     N4 is the number of computed nonzero singular values of A. */
+
+    work[3] = (real) n2;
+/*     N2 is the number of singular values of A greater than SFMIN. */
+/*     If N2<N, SVA(N2:N) contains ZEROS and/or denormalized numbers */
+/*     that may carry some information. */
+
+    work[4] = (real) i__;
+/*     i is the index of the last sweep before declaring convergence. */
+
+    work[5] = mxaapq;
+/*     MXAAPQ is the largest absolute value of scaled pivots in the */
+/*     last sweep */
+
+    work[6] = mxsinj;
+/*     MXSINJ is the largest absolute value of the sines of Jacobi angles */
+/*     in the last sweep */
+
+    return 0;
+} /* sgesvj_ */
+
diff --git a/lapack-netlib/SRC/sgesvx.c b/lapack-netlib/SRC/sgesvx.c
new file mode 100644
index 000000000..06f12d0b4
--- /dev/null
+++ b/lapack-netlib/SRC/sgesvx.c
@@ -0,0 +1,1063 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief <b> SGESVX computes the solution to system of linear equations A * X = B for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGESVX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */
+/*                          EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, */
+/*                          WORK, IWORK, INFO ) */
+
+/*       CHARACTER          EQUED, FACT, TRANS */
+/*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS */
+/*       REAL               RCOND */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
+/*      $                   BERR( * ), C( * ), FERR( * ), R( * ), */
+/*      $                   WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGESVX uses the LU factorization to compute the solution to a real */
+/* > system of linear equations */
+/* >    A * X = B, */
+/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */
+/* > */
+/* > Error bounds on the solution and a condition estimate are also */
+/* > provided. */
+/* > \endverbatim */
+
+/* > \par Description: */
+/*  ================= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The following steps are performed: */
+/* > */
+/* > 1. If FACT = 'E', real scaling factors are computed to equilibrate */
+/* >    the system: */
+/* >       TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B */
+/* >       TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */
+/* >       TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */
+/* >    Whether or not the system will be equilibrated depends on the */
+/* >    scaling of the matrix A, but if equilibration is used, A is */
+/* >    overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */
+/* >    or diag(C)*B (if TRANS = 'T' or 'C'). */
+/* > */
+/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the */
+/* >    matrix A (after equilibration if FACT = 'E') as */
+/* >       A = P * L * U, */
+/* >    where P is a permutation matrix, L is a unit lower triangular */
+/* >    matrix, and U is upper triangular. */
+/* > */
+/* > 3. If some U(i,i)=0, so that U is exactly singular, then the routine */
+/* >    returns with INFO = i. Otherwise, the factored form of A is used */
+/* >    to estimate the condition number of the matrix A.  If the */
+/* >    reciprocal of the condition number is less than machine precision, */
+/* >    INFO = N+1 is returned as a warning, but the routine still goes on */
+/* >    to solve for X and compute error bounds as described below. */
+/* > */
+/* > 4. The system of equations is solved for X using the factored form */
+/* >    of A. */
+/* > */
+/* > 5. Iterative refinement is applied to improve the computed solution */
+/* >    matrix and calculate error bounds and backward error estimates */
+/* >    for it. */
+/* > */
+/* > 6. If equilibration was used, the matrix X is premultiplied by */
+/* >    diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */
+/* >    that it solves the original system before equilibration. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] FACT */
+/* > \verbatim */
+/* >          FACT is CHARACTER*1 */
+/* >          Specifies whether or not the factored form of the matrix A is */
+/* >          supplied on entry, and if not, whether the matrix A should be */
+/* >          equilibrated before it is factored. */
+/* >          = 'F':  On entry, AF and IPIV contain the factored form of A. */
+/* >                  If EQUED is not 'N', the matrix A has been */
+/* >                  equilibrated with scaling factors given by R and C. */
+/* >                  A, AF, and IPIV are not modified. */
+/* >          = 'N':  The matrix A will be copied to AF and factored. */
+/* >          = 'E':  The matrix A will be equilibrated if necessary, then */
+/* >                  copied to AF and factored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations: */
+/* >          = 'N':  A * X = B     (No transpose) */
+/* >          = 'T':  A**T * X = B  (Transpose) */
+/* >          = 'C':  A**H * X = B  (Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of linear equations, i.e., the order of the */
+/* >          matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the N-by-N matrix A.  If FACT = 'F' and EQUED is */
+/* >          not 'N', then A must have been equilibrated by the scaling */
+/* >          factors in R and/or C.  A is not modified if FACT = 'F' or */
+/* >          'N', or if FACT = 'E' and EQUED = 'N' on exit. */
+/* > */
+/* >          On exit, if EQUED .ne. 'N', A is scaled as follows: */
+/* >          EQUED = 'R':  A := diag(R) * A */
+/* >          EQUED = 'C':  A := A * diag(C) */
+/* >          EQUED = 'B':  A := diag(R) * A * diag(C). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AF */
+/* > \verbatim */
+/* >          AF is REAL array, dimension (LDAF,N) */
+/* >          If FACT = 'F', then AF is an input argument and on entry */
+/* >          contains the factors L and U from the factorization */
+/* >          A = P*L*U as computed by SGETRF.  If EQUED .ne. 'N', then */
+/* >          AF is the factored form of the equilibrated matrix A. */
+/* > */
+/* >          If FACT = 'N', then AF is an output argument and on exit */
+/* >          returns the factors L and U from the factorization A = P*L*U */
+/* >          of the original matrix A. */
+/* > */
+/* >          If FACT = 'E', then AF is an output argument and on exit */
+/* >          returns the factors L and U from the factorization A = P*L*U */
+/* >          of the equilibrated matrix A (see the description of A for */
+/* >          the form of the equilibrated matrix). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          If FACT = 'F', then IPIV is an input argument and on entry */
+/* >          contains the pivot indices from the factorization A = P*L*U */
+/* >          as computed by SGETRF; row i of the matrix was interchanged */
+/* >          with row IPIV(i). */
+/* > */
+/* >          If FACT = 'N', then IPIV is an output argument and on exit */
+/* >          contains the pivot indices from the factorization A = P*L*U */
+/* >          of the original matrix A. */
+/* > */
+/* >          If FACT = 'E', then IPIV is an output argument and on exit */
+/* >          contains the pivot indices from the factorization A = P*L*U */
+/* >          of the equilibrated matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] EQUED */
+/* > \verbatim */
+/* >          EQUED is CHARACTER*1 */
+/* >          Specifies the form of equilibration that was done. */
+/* >          = 'N':  No equilibration (always true if FACT = 'N'). */
+/* >          = 'R':  Row equilibration, i.e., A has been premultiplied by */
+/* >                  diag(R). */
+/* >          = 'C':  Column equilibration, i.e., A has been postmultiplied */
+/* >                  by diag(C). */
+/* >          = 'B':  Both row and column equilibration, i.e., A has been */
+/* >                  replaced by diag(R) * A * diag(C). */
+/* >          EQUED is an input argument if FACT = 'F'; otherwise, it is an */
+/* >          output argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] R */
+/* > \verbatim */
+/* >          R is REAL array, dimension (N) */
+/* >          The row scale factors for A.  If EQUED = 'R' or 'B', A is */
+/* >          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */
+/* >          is not accessed.  R is an input argument if FACT = 'F'; */
+/* >          otherwise, R is an output argument.  If FACT = 'F' and */
+/* >          EQUED = 'R' or 'B', each element of R must be positive. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (N) */
+/* >          The column scale factors for A.  If EQUED = 'C' or 'B', A is */
+/* >          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */
+/* >          is not accessed.  C is an input argument if FACT = 'F'; */
+/* >          otherwise, C is an output argument.  If FACT = 'F' and */
+/* >          EQUED = 'C' or 'B', each element of C must be positive. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the N-by-NRHS right hand side matrix B. */
+/* >          On exit, */
+/* >          if EQUED = 'N', B is not modified; */
+/* >          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */
+/* >          diag(R)*B; */
+/* >          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */
+/* >          overwritten by diag(C)*B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension (LDX,NRHS) */
+/* >          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */
+/* >          to the original system of equations.  Note that A and B are */
+/* >          modified on exit if EQUED .ne. 'N', and the solution to the */
+/* >          equilibrated system is inv(diag(C))*X if TRANS = 'N' and */
+/* >          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */
+/* >          and EQUED = 'R' or 'B'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >          The estimate of the reciprocal condition number of the matrix */
+/* >          A after equilibration (if done).  If RCOND is less than the */
+/* >          machine precision (in particular, if RCOND = 0), the matrix */
+/* >          is singular to working precision.  This condition is */
+/* >          indicated by a return code of INFO > 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] FERR */
+/* > \verbatim */
+/* >          FERR is REAL array, dimension (NRHS) */
+/* >          The estimated forward error bound for each solution vector */
+/* >          X(j) (the j-th column of the solution matrix X). */
+/* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
+/* >          is an estimated upper bound for the magnitude of the largest */
+/* >          element in (X(j) - XTRUE) divided by the magnitude of the */
+/* >          largest element in X(j).  The estimate is as reliable as */
+/* >          the estimate for RCOND, and is almost always a slight */
+/* >          overestimate of the true error. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is REAL array, dimension (NRHS) */
+/* >          The componentwise relative backward error of each solution */
+/* >          vector X(j) (i.e., the smallest relative change in */
+/* >          any element of A or B that makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (4*N) */
+/* >          On exit, WORK(1) contains the reciprocal pivot growth */
+/* >          factor norm(A)/norm(U). The "f2cmax absolute element" norm is */
+/* >          used. If WORK(1) is much less than 1, then the stability */
+/* >          of the LU factorization of the (equilibrated) matrix A */
+/* >          could be poor. This also means that the solution X, condition */
+/* >          estimator RCOND, and forward error bound FERR could be */
+/* >          unreliable. If factorization fails with 0<INFO<=N, then */
+/* >          WORK(1) contains the reciprocal pivot growth factor for the */
+/* >          leading INFO columns of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, and i is */
+/* >                <= N:  U(i,i) is exactly zero.  The factorization has */
+/* >                       been completed, but the factor U is exactly */
+/* >                       singular, so the solution and error bounds */
+/* >                       could not be computed. RCOND = 0 is returned. */
+/* >                = N+1: U is nonsingular, but RCOND is less than machine */
+/* >                       precision, meaning that the matrix is singular */
+/* >                       to working precision.  Nevertheless, the */
+/* >                       solution and error bounds are computed because */
+/* >                       there are a number of situations where the */
+/* >                       computed solution can be more accurate than the */
+/* >                       value of RCOND would suggest. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup realGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgesvx_(char *fact, char *trans, integer *n, integer *
+	nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, 
+	char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, 
+	integer *ldx, real *rcond, real *ferr, real *berr, real *work, 
+	integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
+	    x_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Local variables */
+    real amax;
+    char norm[1];
+    integer i__, j;
+    extern logical lsame_(char *, char *);
+    real rcmin, rcmax, anorm;
+    logical equil;
+    real colcnd;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    logical nofact;
+    extern /* Subroutine */ int slaqge_(integer *, integer *, real *, integer 
+	    *, real *, real *, real *, real *, real *, char *), 
+	    xerbla_(char *, integer *, ftnlen), sgecon_(char *, integer *, 
+	    real *, integer *, real *, real *, real *, integer *, integer *);
+    real bignum;
+    integer infequ;
+    logical colequ;
+    extern /* Subroutine */ int sgeequ_(integer *, integer *, real *, integer 
+	    *, real *, real *, real *, real *, real *, integer *), sgerfs_(
+	    char *, integer *, integer *, real *, integer *, real *, integer *
+	    , integer *, real *, integer *, real *, integer *, real *, real *,
+	     real *, integer *, integer *), sgetrf_(integer *, 
+	    integer *, real *, integer *, integer *, integer *);
+    real rowcnd;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *);
+    logical notran;
+    extern real slantr_(char *, char *, char *, integer *, integer *, real *, 
+	    integer *, real *);
+    extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, 
+	    integer *, integer *, real *, integer *, integer *);
+    real smlnum;
+    logical rowequ;
+    real rpvgrw;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    --r__;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --ferr;
+    --berr;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    nofact = lsame_(fact, "N");
+    equil = lsame_(fact, "E");
+    notran = lsame_(trans, "N");
+    if (nofact || equil) {
+	*(unsigned char *)equed = 'N';
+	rowequ = FALSE_;
+	colequ = FALSE_;
+    } else {
+	rowequ = lsame_(equed, "R") || lsame_(equed, 
+		"B");
+	colequ = lsame_(equed, "C") || lsame_(equed, 
+		"B");
+	smlnum = slamch_("Safe minimum");
+	bignum = 1.f / smlnum;
+    }
+
+/*     Test the input parameters. */
+
+    if (! nofact && ! equil && ! lsame_(fact, "F")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -6;
+    } else if (*ldaf < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (lsame_(fact, "F") && ! (rowequ || colequ 
+	    || lsame_(equed, "N"))) {
+	*info = -10;
+    } else {
+	if (rowequ) {
+	    rcmin = bignum;
+	    rcmax = 0.f;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		r__1 = rcmin, r__2 = r__[j];
+		rcmin = f2cmin(r__1,r__2);
+/* Computing MAX */
+		r__1 = rcmax, r__2 = r__[j];
+		rcmax = f2cmax(r__1,r__2);
+/* L10: */
+	    }
+	    if (rcmin <= 0.f) {
+		*info = -11;
+	    } else if (*n > 0) {
+		rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		rowcnd = 1.f;
+	    }
+	}
+	if (colequ && *info == 0) {
+	    rcmin = bignum;
+	    rcmax = 0.f;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		r__1 = rcmin, r__2 = c__[j];
+		rcmin = f2cmin(r__1,r__2);
+/* Computing MAX */
+		r__1 = rcmax, r__2 = c__[j];
+		rcmax = f2cmax(r__1,r__2);
+/* L20: */
+	    }
+	    if (rcmin <= 0.f) {
+		*info = -12;
+	    } else if (*n > 0) {
+		colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		colcnd = 1.f;
+	    }
+	}
+	if (*info == 0) {
+	    if (*ldb < f2cmax(1,*n)) {
+		*info = -14;
+	    } else if (*ldx < f2cmax(1,*n)) {
+		*info = -16;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESVX", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    if (equil) {
+
+/*        Compute row and column scalings to equilibrate the matrix A. */
+
+	sgeequ_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, &
+		amax, &infequ);
+	if (infequ == 0) {
+
+/*           Equilibrate the matrix. */
+
+	    slaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &
+		    colcnd, &amax, equed);
+	    rowequ = lsame_(equed, "R") || lsame_(equed,
+		     "B");
+	    colequ = lsame_(equed, "C") || lsame_(equed,
+		     "B");
+	}
+    }
+
+/*     Scale the right hand side. */
+
+    if (notran) {
+	if (rowequ) {
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1];
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (colequ) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1];
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+    if (nofact || equil) {
+
+/*        Compute the LU factorization of A. */
+
+	slacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf);
+	sgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info);
+
+/*        Return if INFO is non-zero. */
+
+	if (*info > 0) {
+
+/*           Compute the reciprocal pivot growth factor of the */
+/*           leading rank-deficient INFO columns of A. */
+
+	    rpvgrw = slantr_("M", "U", "N", info, info, &af[af_offset], ldaf, 
+		    &work[1]);
+	    if (rpvgrw == 0.f) {
+		rpvgrw = 1.f;
+	    } else {
+		rpvgrw = slange_("M", n, info, &a[a_offset], lda, &work[1]) / rpvgrw;
+	    }
+	    work[1] = rpvgrw;
+	    *rcond = 0.f;
+	    return 0;
+	}
+    }
+
+/*     Compute the norm of the matrix A and the */
+/*     reciprocal pivot growth factor RPVGRW. */
+
+    if (notran) {
+	*(unsigned char *)norm = '1';
+    } else {
+	*(unsigned char *)norm = 'I';
+    }
+    anorm = slange_(norm, n, n, &a[a_offset], lda, &work[1]);
+    rpvgrw = slantr_("M", "U", "N", n, n, &af[af_offset], ldaf, &work[1]);
+    if (rpvgrw == 0.f) {
+	rpvgrw = 1.f;
+    } else {
+	rpvgrw = slange_("M", n, n, &a[a_offset], lda, &work[1]) / 
+		rpvgrw;
+    }
+
+/*     Compute the reciprocal of the condition number of A. */
+
+    sgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1],
+	     info);
+
+/*     Compute the solution matrix X. */
+
+    slacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
+    sgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx,
+	     info);
+
+/*     Use iterative refinement to improve the computed solution and */
+/*     compute error bounds and backward error estimates for it. */
+
+    sgerfs_(trans, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1],
+	     &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[
+	    1], &iwork[1], info);
+
+/*     Transform the solution matrix X to a solution of the original */
+/*     system. */
+
+    if (notran) {
+	if (colequ) {
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1];
+/* L70: */
+		}
+/* L80: */
+	    }
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		ferr[j] /= colcnd;
+/* L90: */
+	    }
+	}
+    } else if (rowequ) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1];
+/* L100: */
+	    }
+/* L110: */
+	}
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    ferr[j] /= rowcnd;
+/* L120: */
+	}
+    }
+
+/*     Set INFO = N+1 if the matrix is singular to working precision. */
+
+    if (*rcond < slamch_("Epsilon")) {
+	*info = *n + 1;
+    }
+
+    work[1] = rpvgrw;
+    return 0;
+
+/*     End of SGESVX */
+
+} /* sgesvx_ */
+
diff --git a/lapack-netlib/SRC/sgesvxx.c b/lapack-netlib/SRC/sgesvxx.c
new file mode 100644
index 000000000..94de9b371
--- /dev/null
+++ b/lapack-netlib/SRC/sgesvxx.c
@@ -0,0 +1,1210 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief <b> SGESVXX computes the solution to system of linear equations A * X = B for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGESVXX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvxx
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvxx
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvxx
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */
+/*                           EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, */
+/*                           BERR, N_ERR_BNDS, ERR_BNDS_NORM, */
+/*                           ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, */
+/*                           INFO ) */
+
+/*       CHARACTER          EQUED, FACT, TRANS */
+/*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */
+/*      $                   N_ERR_BNDS */
+/*       REAL               RCOND, RPVGRW */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
+/*      $                   X( LDX , * ),WORK( * ) */
+/*       REAL               R( * ), C( * ), PARAMS( * ), BERR( * ), */
+/*      $                   ERR_BNDS_NORM( NRHS, * ), */
+/*      $                   ERR_BNDS_COMP( NRHS, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    SGESVXX uses the LU factorization to compute the solution to a */
+/* >    real system of linear equations  A * X = B,  where A is an */
+/* >    N-by-N matrix and X and B are N-by-NRHS matrices. */
+/* > */
+/* >    If requested, both normwise and maximum componentwise error bounds */
+/* >    are returned. SGESVXX will return a solution with a tiny */
+/* >    guaranteed error (O(eps) where eps is the working machine */
+/* >    precision) unless the matrix is very ill-conditioned, in which */
+/* >    case a warning is returned. Relevant condition numbers also are */
+/* >    calculated and returned. */
+/* > */
+/* >    SGESVXX accepts user-provided factorizations and equilibration */
+/* >    factors; see the definitions of the FACT and EQUED options. */
+/* >    Solving with refinement and using a factorization from a previous */
+/* >    SGESVXX call will also produce a solution with either O(eps) */
+/* >    errors or warnings, but we cannot make that claim for general */
+/* >    user-provided factorizations and equilibration factors if they */
+/* >    differ from what SGESVXX would itself produce. */
+/* > \endverbatim */
+
+/* > \par Description: */
+/*  ================= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    The following steps are performed: */
+/* > */
+/* >    1. If FACT = 'E', real scaling factors are computed to equilibrate */
+/* >    the system: */
+/* > */
+/* >      TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B */
+/* >      TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */
+/* >      TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */
+/* > */
+/* >    Whether or not the system will be equilibrated depends on the */
+/* >    scaling of the matrix A, but if equilibration is used, A is */
+/* >    overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */
+/* >    or diag(C)*B (if TRANS = 'T' or 'C'). */
+/* > */
+/* >    2. If FACT = 'N' or 'E', the LU decomposition is used to factor */
+/* >    the matrix A (after equilibration if FACT = 'E') as */
+/* > */
+/* >      A = P * L * U, */
+/* > */
+/* >    where P is a permutation matrix, L is a unit lower triangular */
+/* >    matrix, and U is upper triangular. */
+/* > */
+/* >    3. If some U(i,i)=0, so that U is exactly singular, then the */
+/* >    routine returns with INFO = i. Otherwise, the factored form of A */
+/* >    is used to estimate the condition number of the matrix A (see */
+/* >    argument RCOND). If the reciprocal of the condition number is less */
+/* >    than machine precision, the routine still goes on to solve for X */
+/* >    and compute error bounds as described below. */
+/* > */
+/* >    4. The system of equations is solved for X using the factored form */
+/* >    of A. */
+/* > */
+/* >    5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */
+/* >    the routine will use iterative refinement to try to get a small */
+/* >    error and error bounds.  Refinement calculates the residual to at */
+/* >    least twice the working precision. */
+/* > */
+/* >    6. If equilibration was used, the matrix X is premultiplied by */
+/* >    diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */
+/* >    that it solves the original system before equilibration. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \verbatim */
+/* >     Some optional parameters are bundled in the PARAMS array.  These */
+/* >     settings determine how refinement is performed, but often the */
+/* >     defaults are acceptable.  If the defaults are acceptable, users */
+/* >     can pass NPARAMS = 0 which prevents the source code from accessing */
+/* >     the PARAMS argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] FACT */
+/* > \verbatim */
+/* >          FACT is CHARACTER*1 */
+/* >     Specifies whether or not the factored form of the matrix A is */
+/* >     supplied on entry, and if not, whether the matrix A should be */
+/* >     equilibrated before it is factored. */
+/* >       = 'F':  On entry, AF and IPIV contain the factored form of A. */
+/* >               If EQUED is not 'N', the matrix A has been */
+/* >               equilibrated with scaling factors given by R and C. */
+/* >               A, AF, and IPIV are not modified. */
+/* >       = 'N':  The matrix A will be copied to AF and factored. */
+/* >       = 'E':  The matrix A will be equilibrated if necessary, then */
+/* >               copied to AF and factored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >     Specifies the form of the system of equations: */
+/* >       = 'N':  A * X = B     (No transpose) */
+/* >       = 'T':  A**T * X = B  (Transpose) */
+/* >       = 'C':  A**H * X = B  (Conjugate Transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The number of linear equations, i.e., the order of the */
+/* >     matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >     The number of right hand sides, i.e., the number of columns */
+/* >     of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >     On entry, the N-by-N matrix A.  If FACT = 'F' and EQUED is */
+/* >     not 'N', then A must have been equilibrated by the scaling */
+/* >     factors in R and/or C.  A is not modified if FACT = 'F' or */
+/* >     'N', or if FACT = 'E' and EQUED = 'N' on exit. */
+/* > */
+/* >     On exit, if EQUED .ne. 'N', A is scaled as follows: */
+/* >     EQUED = 'R':  A := diag(R) * A */
+/* >     EQUED = 'C':  A := A * diag(C) */
+/* >     EQUED = 'B':  A := diag(R) * A * diag(C). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >     The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AF */
+/* > \verbatim */
+/* >          AF is REAL array, dimension (LDAF,N) */
+/* >     If FACT = 'F', then AF is an input argument and on entry */
+/* >     contains the factors L and U from the factorization */
+/* >     A = P*L*U as computed by SGETRF.  If EQUED .ne. 'N', then */
+/* >     AF is the factored form of the equilibrated matrix A. */
+/* > */
+/* >     If FACT = 'N', then AF is an output argument and on exit */
+/* >     returns the factors L and U from the factorization A = P*L*U */
+/* >     of the original matrix A. */
+/* > */
+/* >     If FACT = 'E', then AF is an output argument and on exit */
+/* >     returns the factors L and U from the factorization A = P*L*U */
+/* >     of the equilibrated matrix A (see the description of A for */
+/* >     the form of the equilibrated matrix). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >     The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     If FACT = 'F', then IPIV is an input argument and on entry */
+/* >     contains the pivot indices from the factorization A = P*L*U */
+/* >     as computed by SGETRF; row i of the matrix was interchanged */
+/* >     with row IPIV(i). */
+/* > */
+/* >     If FACT = 'N', then IPIV is an output argument and on exit */
+/* >     contains the pivot indices from the factorization A = P*L*U */
+/* >     of the original matrix A. */
+/* > */
+/* >     If FACT = 'E', then IPIV is an output argument and on exit */
+/* >     contains the pivot indices from the factorization A = P*L*U */
+/* >     of the equilibrated matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] EQUED */
+/* > \verbatim */
+/* >          EQUED is CHARACTER*1 */
+/* >     Specifies the form of equilibration that was done. */
+/* >       = 'N':  No equilibration (always true if FACT = 'N'). */
+/* >       = 'R':  Row equilibration, i.e., A has been premultiplied by */
+/* >               diag(R). */
+/* >       = 'C':  Column equilibration, i.e., A has been postmultiplied */
+/* >               by diag(C). */
+/* >       = 'B':  Both row and column equilibration, i.e., A has been */
+/* >               replaced by diag(R) * A * diag(C). */
+/* >     EQUED is an input argument if FACT = 'F'; otherwise, it is an */
+/* >     output argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] R */
+/* > \verbatim */
+/* >          R is REAL array, dimension (N) */
+/* >     The row scale factors for A.  If EQUED = 'R' or 'B', A is */
+/* >     multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */
+/* >     is not accessed.  R is an input argument if FACT = 'F'; */
+/* >     otherwise, R is an output argument.  If FACT = 'F' and */
+/* >     EQUED = 'R' or 'B', each element of R must be positive. */
+/* >     If R is output, each element of R is a power of the radix. */
+/* >     If R is input, each element of R should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (N) */
+/* >     The column scale factors for A.  If EQUED = 'C' or 'B', A is */
+/* >     multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */
+/* >     is not accessed.  C is an input argument if FACT = 'F'; */
+/* >     otherwise, C is an output argument.  If FACT = 'F' and */
+/* >     EQUED = 'C' or 'B', each element of C must be positive. */
+/* >     If C is output, each element of C is a power of the radix. */
+/* >     If C is input, each element of C should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >     On entry, the N-by-NRHS right hand side matrix B. */
+/* >     On exit, */
+/* >     if EQUED = 'N', B is not modified; */
+/* >     if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */
+/* >        diag(R)*B; */
+/* >     if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */
+/* >        overwritten by diag(C)*B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >     The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension (LDX,NRHS) */
+/* >     If INFO = 0, the N-by-NRHS solution matrix X to the original */
+/* >     system of equations.  Note that A and B are modified on exit */
+/* >     if EQUED .ne. 'N', and the solution to the equilibrated system is */
+/* >     inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */
+/* >     inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >     The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >     Reciprocal scaled condition number.  This is an estimate of the */
+/* >     reciprocal Skeel condition number of the matrix A after */
+/* >     equilibration (if done).  If this is less than the machine */
+/* >     precision (in particular, if it is zero), the matrix is singular */
+/* >     to working precision.  Note that the error may still be small even */
+/* >     if this number is very small and the matrix appears ill- */
+/* >     conditioned. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RPVGRW */
+/* > \verbatim */
+/* >          RPVGRW is REAL */
+/* >     Reciprocal pivot growth.  On exit, this contains the reciprocal */
+/* >     pivot growth factor norm(A)/norm(U). The "f2cmax absolute element" */
+/* >     norm is used.  If this is much less than 1, then the stability of */
+/* >     the LU factorization of the (equilibrated) matrix A could be poor. */
+/* >     This also means that the solution X, estimated condition numbers, */
+/* >     and error bounds could be unreliable. If factorization fails with */
+/* >     0<INFO<=N, then this contains the reciprocal pivot growth factor */
+/* >     for the leading INFO columns of A.  In SGESVX, this quantity is */
+/* >     returned in WORK(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is REAL array, dimension (NRHS) */
+/* >     Componentwise relative backward error.  This is the */
+/* >     componentwise relative backward error of each solution vector X(j) */
+/* >     (i.e., the smallest relative change in any element of A or B that */
+/* >     makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N_ERR_BNDS */
+/* > \verbatim */
+/* >          N_ERR_BNDS is INTEGER */
+/* >     Number of error bounds to return for each right hand side */
+/* >     and each type (normwise or componentwise).  See ERR_BNDS_NORM and */
+/* >     ERR_BNDS_COMP below. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_NORM */
+/* > \verbatim */
+/* >          ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     normwise relative error, which is defined as follows: */
+/* > */
+/* >     Normwise relative error in the ith solution vector: */
+/* >             max_j (abs(XTRUE(j,i) - X(j,i))) */
+/* >            ------------------------------ */
+/* >                  max_j abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the type of error information as described */
+/* >     below. There currently are up to three pieces of information */
+/* >     returned. */
+/* > */
+/* >     The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_NORM(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated normwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * slamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*A, where S scales each row by a power of the */
+/* >              radix so all absolute row sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_COMP */
+/* > \verbatim */
+/* >          ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     componentwise relative error, which is defined as follows: */
+/* > */
+/* >     Componentwise relative error in the ith solution vector: */
+/* >                    abs(XTRUE(j,i) - X(j,i)) */
+/* >             max_j ---------------------- */
+/* >                         abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the right-hand side i (on which the */
+/* >     componentwise relative error depends), and the type of error */
+/* >     information as described below. There currently are up to three */
+/* >     pieces of information returned for each right-hand side. If */
+/* >     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
+/* >     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most */
+/* >     the first (:,N_ERR_BNDS) entries are returned. */
+/* > */
+/* >     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_COMP(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated componentwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * slamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*(A*diag(x)), where x is the solution for the */
+/* >              current right-hand side and S scales each row of */
+/* >              A*diag(x) by a power of the radix so all absolute row */
+/* >              sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NPARAMS */
+/* > \verbatim */
+/* >          NPARAMS is INTEGER */
+/* >     Specifies the number of parameters set in PARAMS.  If <= 0, the */
+/* >     PARAMS array is never referenced and default values are used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] PARAMS */
+/* > \verbatim */
+/* >          PARAMS is REAL array, dimension NPARAMS */
+/* >     Specifies algorithm parameters.  If an entry is < 0.0, then */
+/* >     that entry will be filled with default value used for that */
+/* >     parameter.  Only positions up to NPARAMS are accessed; defaults */
+/* >     are used for higher-numbered parameters. */
+/* > */
+/* >       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */
+/* >            refinement or not. */
+/* >         Default: 1.0 */
+/* >            = 0.0:  No refinement is performed, and no error bounds are */
+/* >                    computed. */
+/* >            = 1.0:  Use the double-precision refinement algorithm, */
+/* >                    possibly with doubled-single computations if the */
+/* >                    compilation environment does not support DOUBLE */
+/* >                    PRECISION. */
+/* >              (other values are reserved for future use) */
+/* > */
+/* >       PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */
+/* >            computations allowed for refinement. */
+/* >         Default: 10 */
+/* >         Aggressive: Set to 100 to permit convergence using approximate */
+/* >                     factorizations or factorizations other than LU. If */
+/* >                     the factorization uses a technique other than */
+/* >                     Gaussian elimination, the guarantees in */
+/* >                     err_bnds_norm and err_bnds_comp may no longer be */
+/* >                     trustworthy. */
+/* > */
+/* >       PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */
+/* >            will attempt to find a solution with small componentwise */
+/* >            relative error in the double-precision algorithm.  Positive */
+/* >            is true, 0.0 is false. */
+/* >         Default: 1.0 (attempt componentwise convergence) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (4*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. The solution to every right-hand side is */
+/* >         guaranteed. */
+/* >       < 0:  If INFO = -i, the i-th argument had an illegal value */
+/* >       > 0 and <= N:  U(INFO,INFO) is exactly zero.  The factorization */
+/* >         has been completed, but the factor U is exactly singular, so */
+/* >         the solution and error bounds could not be computed. RCOND = 0 */
+/* >         is returned. */
+/* >       = N+J: The solution corresponding to the Jth right-hand side is */
+/* >         not guaranteed. The solutions corresponding to other right- */
+/* >         hand sides K with K > J may not be guaranteed as well, but */
+/* >         only the first such right-hand side is reported. If a small */
+/* >         componentwise error is not requested (PARAMS(3) = 0.0) then */
+/* >         the Jth right-hand side is the first with a normwise error */
+/* >         bound that is not guaranteed (the smallest J such */
+/* >         that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */
+/* >         the Jth right-hand side is the first with either a normwise or */
+/* >         componentwise error bound that is not guaranteed (the smallest */
+/* >         J such that either ERR_BNDS_NORM(J,1) = 0.0 or */
+/* >         ERR_BNDS_COMP(J,1) = 0.0). See the definition of */
+/* >         ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */
+/* >         about all of the right-hand sides check ERR_BNDS_NORM or */
+/* >         ERR_BNDS_COMP. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup realGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgesvxx_(char *fact, char *trans, integer *n, integer *
+	nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, 
+	char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, 
+	integer *ldx, real *rcond, real *rpvgrw, real *berr, integer *
+	n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer *
+	nparams, real *params, real *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
+	    x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, 
+	    err_bnds_comp_dim1, err_bnds_comp_offset, i__1;
+    real r__1, r__2;
+
+    /* Local variables */
+    real amax;
+    extern real sla_gerpvgrw_(integer *, integer *, real *, integer *, real *
+	    , integer *);
+    integer j;
+    extern logical lsame_(char *, char *);
+    real rcmin, rcmax;
+    logical equil;
+    real colcnd;
+    extern real slamch_(char *);
+    logical nofact;
+    extern /* Subroutine */ int slaqge_(integer *, integer *, real *, integer 
+	    *, real *, real *, real *, real *, real *, char *), 
+	    xerbla_(char *, integer *, ftnlen);
+    real bignum;
+    integer infequ;
+    logical colequ;
+    extern /* Subroutine */ int sgetrf_(integer *, integer *, real *, integer 
+	    *, integer *, integer *);
+    real rowcnd;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *);
+    logical notran;
+    extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, 
+	    integer *, integer *, real *, integer *, integer *);
+    real smlnum;
+    logical rowequ;
+    extern /* Subroutine */ int slascl2_(integer *, integer *, real *, real *,
+	     integer *), sgeequb_(integer *, integer *, real *, integer *, 
+	    real *, real *, real *, real *, real *, integer *), sgerfsx_(char 
+	    *, char *, integer *, integer *, real *, integer *, real *, 
+	    integer *, integer *, real *, real *, real *, integer *, real *, 
+	    integer *, real *, real *, integer *, real *, real *, integer *, 
+	    real *, real *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ================================================================== */
+
+
+    /* Parameter adjustments */
+    err_bnds_comp_dim1 = *nrhs;
+    err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1;
+    err_bnds_comp__ -= err_bnds_comp_offset;
+    err_bnds_norm_dim1 = *nrhs;
+    err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1;
+    err_bnds_norm__ -= err_bnds_norm_offset;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    --r__;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --berr;
+    --params;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    nofact = lsame_(fact, "N");
+    equil = lsame_(fact, "E");
+    notran = lsame_(trans, "N");
+    smlnum = slamch_("Safe minimum");
+    bignum = 1.f / smlnum;
+    if (nofact || equil) {
+	*(unsigned char *)equed = 'N';
+	rowequ = FALSE_;
+	colequ = FALSE_;
+    } else {
+	rowequ = lsame_(equed, "R") || lsame_(equed, 
+		"B");
+	colequ = lsame_(equed, "C") || lsame_(equed, 
+		"B");
+    }
+
+/*     Default is failure.  If an input parameter is wrong or */
+/*     factorization fails, make everything look horrible.  Only the */
+/*     pivot growth is set here, the rest is initialized in SGERFSX. */
+
+    *rpvgrw = 0.f;
+
+/*     Test the input parameters.  PARAMS is not tested until SGERFSX. */
+
+    if (! nofact && ! equil && ! lsame_(fact, "F")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -6;
+    } else if (*ldaf < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (lsame_(fact, "F") && ! (rowequ || colequ 
+	    || lsame_(equed, "N"))) {
+	*info = -10;
+    } else {
+	if (rowequ) {
+	    rcmin = bignum;
+	    rcmax = 0.f;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		r__1 = rcmin, r__2 = r__[j];
+		rcmin = f2cmin(r__1,r__2);
+/* Computing MAX */
+		r__1 = rcmax, r__2 = r__[j];
+		rcmax = f2cmax(r__1,r__2);
+/* L10: */
+	    }
+	    if (rcmin <= 0.f) {
+		*info = -11;
+	    } else if (*n > 0) {
+		rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		rowcnd = 1.f;
+	    }
+	}
+	if (colequ && *info == 0) {
+	    rcmin = bignum;
+	    rcmax = 0.f;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		r__1 = rcmin, r__2 = c__[j];
+		rcmin = f2cmin(r__1,r__2);
+/* Computing MAX */
+		r__1 = rcmax, r__2 = c__[j];
+		rcmax = f2cmax(r__1,r__2);
+/* L20: */
+	    }
+	    if (rcmin <= 0.f) {
+		*info = -12;
+	    } else if (*n > 0) {
+		colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		colcnd = 1.f;
+	    }
+	}
+	if (*info == 0) {
+	    if (*ldb < f2cmax(1,*n)) {
+		*info = -14;
+	    } else if (*ldx < f2cmax(1,*n)) {
+		*info = -16;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESVXX", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    if (equil) {
+
+/*     Compute row and column scalings to equilibrate the matrix A. */
+
+	sgeequb_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, 
+		&amax, &infequ);
+	if (infequ == 0) {
+
+/*     Equilibrate the matrix. */
+
+	    slaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &
+		    colcnd, &amax, equed);
+	    rowequ = lsame_(equed, "R") || lsame_(equed,
+		     "B");
+	    colequ = lsame_(equed, "C") || lsame_(equed,
+		     "B");
+	}
+
+/*     If the scaling factors are not applied, set them to 1.0. */
+
+	if (! rowequ) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		r__[j] = 1.f;
+	    }
+	}
+	if (! colequ) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		c__[j] = 1.f;
+	    }
+	}
+    }
+
+/*     Scale the right-hand side. */
+
+    if (notran) {
+	if (rowequ) {
+	    slascl2_(n, nrhs, &r__[1], &b[b_offset], ldb);
+	}
+    } else {
+	if (colequ) {
+	    slascl2_(n, nrhs, &c__[1], &b[b_offset], ldb);
+	}
+    }
+
+    if (nofact || equil) {
+
+/*        Compute the LU factorization of A. */
+
+	slacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf);
+	sgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info);
+
+/*        Return if INFO is non-zero. */
+
+	if (*info > 0) {
+
+/*           Pivot in column INFO is exactly 0 */
+/*           Compute the reciprocal pivot growth factor of the */
+/*           leading rank-deficient INFO columns of A. */
+
+	    *rpvgrw = sla_gerpvgrw_(n, info, &a[a_offset], lda, &af[
+		    af_offset], ldaf);
+	    return 0;
+	}
+    }
+
+/*     Compute the reciprocal pivot growth factor RPVGRW. */
+
+    *rpvgrw = sla_gerpvgrw_(n, n, &a[a_offset], lda, &af[af_offset], ldaf);
+
+/*     Compute the solution matrix X. */
+
+    slacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
+    sgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx,
+	     info);
+
+/*     Use iterative refinement to improve the computed solution and */
+/*     compute error bounds and backward error estimates for it. */
+
+    sgerfsx_(trans, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &
+	    ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, &x[x_offset], ldx, 
+	    rcond, &berr[1], n_err_bnds__, &err_bnds_norm__[
+	    err_bnds_norm_offset], &err_bnds_comp__[err_bnds_comp_offset], 
+	    nparams, &params[1], &work[1], &iwork[1], info);
+
+/*     Scale solutions. */
+
+    if (colequ && notran) {
+	slascl2_(n, nrhs, &c__[1], &x[x_offset], ldx);
+    } else if (rowequ && ! notran) {
+	slascl2_(n, nrhs, &r__[1], &x[x_offset], ldx);
+    }
+
+    return 0;
+
+/*     End of SGESVXX */
+} /* sgesvxx_ */
+
diff --git a/lapack-netlib/SRC/sgetc2.c b/lapack-netlib/SRC/sgetc2.c
new file mode 100644
index 000000000..28aab8829
--- /dev/null
+++ b/lapack-netlib/SRC/sgetc2.c
@@ -0,0 +1,645 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b10 = -1.f;
+
+/* > \brief \b SGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGETC2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetc2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetc2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetc2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) */
+
+/*       INTEGER            INFO, LDA, N */
+/*       INTEGER            IPIV( * ), JPIV( * ) */
+/*       REAL               A( LDA, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGETC2 computes an LU factorization with complete pivoting of the */
+/* > n-by-n matrix A. The factorization has the form A = P * L * U * Q, */
+/* > where P and Q are permutation matrices, L is lower triangular with */
+/* > unit diagonal elements and U is upper triangular. */
+/* > */
+/* > This is the Level 2 BLAS algorithm. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the n-by-n matrix A to be factored. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U*Q; the unit diagonal elements of L are not stored. */
+/* >          If U(k, k) appears to be less than SMIN, U(k, k) is given the */
+/* >          value of SMIN, i.e., giving a nonsingular perturbed system. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension(N). */
+/* >          The pivot indices; for 1 <= i <= N, row i of the */
+/* >          matrix has been interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] JPIV */
+/* > \verbatim */
+/* >          JPIV is INTEGER array, dimension(N). */
+/* >          The pivot indices; for 1 <= j <= N, column j of the */
+/* >          matrix has been interchanged with column JPIV(j). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >           = 0: successful exit */
+/* >           > 0: if INFO = k, U(k, k) is likely to produce overflow if */
+/* >                we try to solve for x in Ax = b. So U is perturbed to */
+/* >                avoid the overflow. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup realGEauxiliary */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
+/* >     Umea University, S-901 87 Umea, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgetc2_(integer *n, real *a, integer *lda, integer *ipiv,
+	 integer *jpiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
+	    integer *, real *, integer *, real *, integer *);
+    real smin, xmax;
+    integer i__, j;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *), slabad_(real *, real *);
+    integer ip, jp;
+    extern real slamch_(char *);
+    real bignum, smlnum, eps;
+    integer ipv, jpv;
+
+
+/*  -- LAPACK auxiliary routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+    --jpiv;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Set constants to control overflow */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S") / eps;
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+
+/*     Handle the case N=1 by itself */
+
+    if (*n == 1) {
+	ipiv[1] = 1;
+	jpiv[1] = 1;
+	if ((r__1 = a[a_dim1 + 1], abs(r__1)) < smlnum) {
+	    *info = 1;
+	    a[a_dim1 + 1] = smlnum;
+	}
+	return 0;
+    }
+
+/*     Factorize A using complete pivoting. */
+/*     Set pivots less than SMIN to SMIN. */
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Find f2cmax element in matrix A */
+
+	xmax = 0.f;
+	i__2 = *n;
+	for (ip = i__; ip <= i__2; ++ip) {
+	    i__3 = *n;
+	    for (jp = i__; jp <= i__3; ++jp) {
+		if ((r__1 = a[ip + jp * a_dim1], abs(r__1)) >= xmax) {
+		    xmax = (r__1 = a[ip + jp * a_dim1], abs(r__1));
+		    ipv = ip;
+		    jpv = jp;
+		}
+/* L10: */
+	    }
+/* L20: */
+	}
+	if (i__ == 1) {
+/* Computing MAX */
+	    r__1 = eps * xmax;
+	    smin = f2cmax(r__1,smlnum);
+	}
+
+/*        Swap rows */
+
+	if (ipv != i__) {
+	    sswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda);
+	}
+	ipiv[i__] = ipv;
+
+/*        Swap columns */
+
+	if (jpv != i__) {
+	    sswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], &
+		    c__1);
+	}
+	jpiv[i__] = jpv;
+
+/*        Check for singularity */
+
+	if ((r__1 = a[i__ + i__ * a_dim1], abs(r__1)) < smin) {
+	    *info = i__;
+	    a[i__ + i__ * a_dim1] = smin;
+	}
+	i__2 = *n;
+	for (j = i__ + 1; j <= i__2; ++j) {
+	    a[j + i__ * a_dim1] /= a[i__ + i__ * a_dim1];
+/* L30: */
+	}
+	i__2 = *n - i__;
+	i__3 = *n - i__;
+	sger_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ 
+		+ (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * a_dim1], 
+		lda);
+/* L40: */
+    }
+
+    if ((r__1 = a[*n + *n * a_dim1], abs(r__1)) < smin) {
+	*info = *n;
+	a[*n + *n * a_dim1] = smin;
+    }
+
+/*     Set last pivots to N */
+
+    ipiv[*n] = *n;
+    jpiv[*n] = *n;
+
+    return 0;
+
+/*     End of SGETC2 */
+
+} /* sgetc2_ */
+
diff --git a/lapack-netlib/SRC/sgetf2.c b/lapack-netlib/SRC/sgetf2.c
new file mode 100644
index 000000000..1bb1cc839
--- /dev/null
+++ b/lapack-netlib/SRC/sgetf2.c
@@ -0,0 +1,618 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b8 = -1.f;
+
+/* > \brief \b SGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row
+ interchanges (unblocked algorithm). */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGETF2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetf2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetf2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetf2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGETF2( M, N, A, LDA, IPIV, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               A( LDA, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGETF2 computes an LU factorization of a general m-by-n matrix A */
+/* > using partial pivoting with row interchanges. */
+/* > */
+/* > The factorization has the form */
+/* >    A = P * L * U */
+/* > where P is a permutation matrix, L is lower triangular with unit */
+/* > diagonal elements (lower trapezoidal if m > n), and U is upper */
+/* > triangular (upper trapezoidal if m < n). */
+/* > */
+/* > This is the right-looking Level 2 BLAS version of the algorithm. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix to be factored. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U; the unit diagonal elements of L are not stored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
+/* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -k, the k-th argument had an illegal value */
+/* >          > 0: if INFO = k, U(k,k) is exactly zero. The factorization */
+/* >               has been completed, but the factor U is exactly */
+/* >               singular, and division by zero will occur if it is used */
+/* >               to solve a system of equations. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda, 
+	integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
+	    integer *, real *, integer *, real *, integer *);
+    integer i__, j;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    real sfmin;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *);
+    integer jp;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer isamax_(integer *, real *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETF2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Compute machine safe minimum */
+
+    sfmin = slamch_("S");
+
+    i__1 = f2cmin(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
+
+/*        Find pivot and test for singularity. */
+
+	i__2 = *m - j + 1;
+	jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	ipiv[j] = jp;
+	if (a[jp + j * a_dim1] != 0.f) {
+
+/*           Apply the interchange to columns 1:N. */
+
+	    if (jp != j) {
+		sswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	    }
+
+/*           Compute elements J+1:M of J-th column. */
+
+	    if (j < *m) {
+		if ((r__1 = a[j + j * a_dim1], abs(r__1)) >= sfmin) {
+		    i__2 = *m - j;
+		    r__1 = 1.f / a[j + j * a_dim1];
+		    sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+		} else {
+		    i__2 = *m - j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
+/* L20: */
+		    }
+		}
+	    }
+
+	} else if (*info == 0) {
+
+	    *info = j;
+	}
+
+	if (j < f2cmin(*m,*n)) {
+
+/*           Update trailing submatrix. */
+
+	    i__2 = *m - j;
+	    i__3 = *n - j;
+	    sger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + (
+		    j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda);
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGETF2 */
+
+} /* sgetf2_ */
+
diff --git a/lapack-netlib/SRC/sgetrf.c b/lapack-netlib/SRC/sgetrf.c
new file mode 100644
index 000000000..f4f7a4977
--- /dev/null
+++ b/lapack-netlib/SRC/sgetrf.c
@@ -0,0 +1,643 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static real c_b16 = 1.f;
+static real c_b19 = -1.f;
+
+/* > \brief \b SGETRF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGETRF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetrf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetrf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetrf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               A( LDA, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGETRF computes an LU factorization of a general M-by-N matrix A */
+/* > using partial pivoting with row interchanges. */
+/* > */
+/* > The factorization has the form */
+/* >    A = P * L * U */
+/* > where P is a permutation matrix, L is lower triangular with unit */
+/* > diagonal elements (lower trapezoidal if m > n), and U is upper */
+/* > triangular (upper trapezoidal if m < n). */
+/* > */
+/* > This is the right-looking Level 3 BLAS version of the algorithm. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix to be factored. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U; the unit diagonal elements of L are not stored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
+/* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
+/* >                has been completed, but the factor U is exactly */
+/* >                singular, and division by zero will occur if it is used */
+/* >                to solve a system of equations. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda, 
+	integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    integer i__, j, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *), strsm_(char *, char *, char *,
+	     char *, integer *, integer *, real *, real *, integer *, real *, 
+	    integer *);
+    integer jb, nb;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
+	    *, integer *, integer *, integer *), sgetrf2_(integer *, integer *
+	    , real *, integer *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETRF", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    if (nb <= 1 || nb >= f2cmin(*m,*n)) {
+
+/*        Use unblocked code. */
+
+	sgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code. */
+
+	i__1 = f2cmin(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = f2cmin(*m,*n) - j + 1;
+	    jb = f2cmin(i__3,nb);
+
+/*           Factor diagonal and subdiagonal blocks and test for exact */
+/*           singularity. */
+
+	    i__3 = *m - j + 1;
+	    sgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+
+/*           Adjust INFO and the pivot indices. */
+
+	    if (*info == 0 && iinfo > 0) {
+		*info = iinfo + j - 1;
+	    }
+/* Computing MIN */
+	    i__4 = *m, i__5 = j + jb - 1;
+	    i__3 = f2cmin(i__4,i__5);
+	    for (i__ = j; i__ <= i__3; ++i__) {
+		ipiv[i__] = j - 1 + ipiv[i__];
+/* L10: */
+	    }
+
+/*           Apply interchanges to columns 1:J-1. */
+
+	    i__3 = j - 1;
+	    i__4 = j + jb - 1;
+	    slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+
+	    if (j + jb <= *n) {
+
+/*              Apply interchanges to columns J+JB:N. */
+
+		i__3 = *n - j - jb + 1;
+		i__4 = j + jb - 1;
+		slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+			ipiv[1], &c__1);
+
+/*              Compute block row of U. */
+
+		i__3 = *n - j - jb + 1;
+		strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * 
+			a_dim1], lda);
+		if (j + jb <= *m) {
+
+/*                 Update trailing submatrix. */
+
+		    i__3 = *m - j - jb + 1;
+		    i__4 = *n - j - jb + 1;
+		    sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, 
+			    &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + 
+			    jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) *
+			     a_dim1], lda);
+		}
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of SGETRF */
+
+} /* sgetrf_ */
+
diff --git a/lapack-netlib/SRC/sgetrf2.c b/lapack-netlib/SRC/sgetrf2.c
new file mode 100644
index 000000000..77c7ecacc
--- /dev/null
+++ b/lapack-netlib/SRC/sgetrf2.c
@@ -0,0 +1,681 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b13 = 1.f;
+static real c_b16 = -1.f;
+
+/* > \brief \b SGETRF2 */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/*  Definition: */
+/*  =========== */
+
+/*        SUBROUTINE SGETRF2( M, N, A, LDA, IPIV, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               A( LDA, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGETRF2 computes an LU factorization of a general M-by-N matrix A */
+/* > using partial pivoting with row interchanges. */
+/* > */
+/* > The factorization has the form */
+/* >    A = P * L * U */
+/* > where P is a permutation matrix, L is lower triangular with unit */
+/* > diagonal elements (lower trapezoidal if m > n), and U is upper */
+/* > triangular (upper trapezoidal if m < n). */
+/* > */
+/* > This is the recursive version of the algorithm. It divides */
+/* > the matrix into four submatrices: */
+/* > */
+/* >        [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2 */
+/* >    A = [ -----|----- ]  with n1 = f2cmin(m,n)/2 */
+/* >        [  A21 | A22  ]       n2 = n-n1 */
+/* > */
+/* >                                       [ A11 ] */
+/* > The subroutine calls itself to factor [ --- ], */
+/* >                                       [ A12 ] */
+/* >                 [ A12 ] */
+/* > do the swaps on [ --- ], solve A12, update A22, */
+/* >                 [ A22 ] */
+/* > */
+/* > then calls itself to factor A22 and do the swaps on A21. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix to be factored. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U; the unit diagonal elements of L are not stored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
+/* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
+/* >                has been completed, but the factor U is exactly */
+/* >                singular, and division by zero will occur if it is used */
+/* >                to solve a system of equations. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgetrf2_(integer *m, integer *n, real *a, integer *lda, 
+	integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1;
+
+    /* Local variables */
+    real temp;
+    integer i__, iinfo;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
+	    sgemm_(char *, char *, integer *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    real sfmin;
+    integer n1, n2;
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    );
+    extern real slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
+	    *, integer *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETRF2", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+    if (*m == 1) {
+
+/*        Use unblocked code for one row case */
+/*        Just need to handle IPIV and INFO */
+
+	ipiv[1] = 1;
+	if (a[a_dim1 + 1] == 0.f) {
+	    *info = 1;
+	}
+
+    } else if (*n == 1) {
+
+/*        Use unblocked code for one column case */
+
+
+/*        Compute machine safe minimum */
+
+	sfmin = slamch_("S");
+
+/*        Find pivot and test for singularity */
+
+	i__ = isamax_(m, &a[a_dim1 + 1], &c__1);
+	ipiv[1] = i__;
+	if (a[i__ + a_dim1] != 0.f) {
+
+/*           Apply the interchange */
+
+	    if (i__ != 1) {
+		temp = a[a_dim1 + 1];
+		a[a_dim1 + 1] = a[i__ + a_dim1];
+		a[i__ + a_dim1] = temp;
+	    }
+
+/*           Compute elements 2:M of the column */
+
+	    if ((r__1 = a[a_dim1 + 1], abs(r__1)) >= sfmin) {
+		i__1 = *m - 1;
+		r__1 = 1.f / a[a_dim1 + 1];
+		sscal_(&i__1, &r__1, &a[a_dim1 + 2], &c__1);
+	    } else {
+		i__1 = *m - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    a[i__ + 1 + a_dim1] /= a[a_dim1 + 1];
+/* L10: */
+		}
+	    }
+
+	} else {
+	    *info = 1;
+	}
+
+    } else {
+
+/*        Use recursive code */
+
+	n1 = f2cmin(*m,*n) / 2;
+	n2 = *n - n1;
+
+/*               [ A11 ] */
+/*        Factor [ --- ] */
+/*               [ A21 ] */
+
+	sgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo);
+	if (*info == 0 && iinfo > 0) {
+	    *info = iinfo;
+	}
+
+/*                              [ A12 ] */
+/*        Apply interchanges to [ --- ] */
+/*                              [ A22 ] */
+
+	slaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &
+		c__1);
+
+/*        Solve A12 */
+
+	strsm_("L", "L", "N", "U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[(
+		n1 + 1) * a_dim1 + 1], lda);
+
+/*        Update A22 */
+
+	i__1 = *m - n1;
+	sgemm_("N", "N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda, &
+		a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * 
+		a_dim1], lda);
+
+/*        Factor A22 */
+
+	i__1 = *m - n1;
+	sgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 
+		1], &iinfo);
+
+/*        Adjust INFO and the pivot indices */
+
+	if (*info == 0 && iinfo > 0) {
+	    *info = iinfo + n1;
+	}
+	i__1 = f2cmin(*m,*n);
+	for (i__ = n1 + 1; i__ <= i__1; ++i__) {
+	    ipiv[i__] += n1;
+/* L20: */
+	}
+
+/*        Apply interchanges to A21 */
+
+	i__1 = n1 + 1;
+	i__2 = f2cmin(*m,*n);
+	slaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1);
+
+    }
+    return 0;
+
+/*     End of SGETRF2 */
+
+} /* sgetrf2_ */
+
diff --git a/lapack-netlib/SRC/sgetri.c b/lapack-netlib/SRC/sgetri.c
new file mode 100644
index 000000000..003d72975
--- /dev/null
+++ b/lapack-netlib/SRC/sgetri.c
@@ -0,0 +1,690 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__2 = 2;
+static real c_b20 = -1.f;
+static real c_b22 = 1.f;
+
+/* > \brief \b SGETRI */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGETRI + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetri.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetri.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetri.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, N */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               A( LDA, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGETRI computes the inverse of a matrix using the LU factorization */
+/* > computed by SGETRF. */
+/* > */
+/* > This method inverts U and then computes inv(A) by solving the system */
+/* > inv(A)*L = inv(U) for inv(A). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the factors L and U from the factorization */
+/* >          A = P*L*U as computed by SGETRF. */
+/* >          On exit, if INFO = 0, the inverse of the original matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices from SGETRF; for 1<=i<=N, row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For optimal performance LWORK >= N*NB, where NB is */
+/* >          the optimal blocksize returned by ILAENV. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is */
+/* >                singular and its inverse could not be computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgetri_(integer *n, real *a, integer *lda, integer *ipiv,
+	 real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__, j, nbmin;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *), sgemv_(char *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *), sswap_(integer *, real *, integer *, 
+	    real *, integer *), strsm_(char *, char *, char *, char *, 
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    );
+    integer jb, nb, jj, jp, nn;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int strtri_(char *, char *, integer *, real *, 
+	    integer *, integer *);
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SGETRI", " ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    lwkopt = *n * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -3;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETRI", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Form inv(U).  If INFO > 0 from STRTRI, then U is singular, */
+/*     and the inverse is not computed. */
+
+    strtri_("Upper", "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = *n;
+    if (nb > 1 && nb < *n) {
+/* Computing MAX */
+	i__1 = ldwork * nb;
+	iws = f2cmax(i__1,1);
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/* Computing MAX */
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "SGETRI", " ", n, &c_n1, &c_n1, &
+		    c_n1, (ftnlen)6, (ftnlen)1);
+	    nbmin = f2cmax(i__1,i__2);
+	}
+    } else {
+	iws = *n;
+    }
+
+/*     Solve the equation inv(A)*L = inv(U) for inv(A). */
+
+    if (nb < nbmin || nb >= *n) {
+
+/*        Use unblocked code. */
+
+	for (j = *n; j >= 1; --j) {
+
+/*           Copy current column of L to WORK and replace with zeros. */
+
+	    i__1 = *n;
+	    for (i__ = j + 1; i__ <= i__1; ++i__) {
+		work[i__] = a[i__ + j * a_dim1];
+		a[i__ + j * a_dim1] = 0.f;
+/* L10: */
+	    }
+
+/*           Compute current column of inv(A). */
+
+	    if (j < *n) {
+		i__1 = *n - j;
+		sgemv_("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 
+			+ 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 
+			+ 1], &c__1);
+	    }
+/* L20: */
+	}
+    } else {
+
+/*        Use blocked code. */
+
+	nn = (*n - 1) / nb * nb + 1;
+	i__1 = -nb;
+	for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *n - j + 1;
+	    jb = f2cmin(i__2,i__3);
+
+/*           Copy current block column of L to WORK and replace with */
+/*           zeros. */
+
+	    i__2 = j + jb - 1;
+	    for (jj = j; jj <= i__2; ++jj) {
+		i__3 = *n;
+		for (i__ = jj + 1; i__ <= i__3; ++i__) {
+		    work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1];
+		    a[i__ + jj * a_dim1] = 0.f;
+/* L30: */
+		}
+/* L40: */
+	    }
+
+/*           Compute current block column of inv(A). */
+
+	    if (j + jb <= *n) {
+		i__2 = *n - j - jb + 1;
+		sgemm_("No transpose", "No transpose", n, &jb, &i__2, &c_b20, 
+			&a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &
+			ldwork, &c_b22, &a[j * a_dim1 + 1], lda);
+	    }
+	    strsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, &
+		    work[j], &ldwork, &a[j * a_dim1 + 1], lda);
+/* L50: */
+	}
+    }
+
+/*     Apply column interchanges. */
+
+    for (j = *n - 1; j >= 1; --j) {
+	jp = ipiv[j];
+	if (jp != j) {
+	    sswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
+	}
+/* L60: */
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGETRI */
+
+} /* sgetri_ */
+
diff --git a/lapack-netlib/SRC/sgetrs.c b/lapack-netlib/SRC/sgetrs.c
new file mode 100644
index 000000000..b27fbabba
--- /dev/null
+++ b/lapack-netlib/SRC/sgetrs.c
@@ -0,0 +1,619 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b12 = 1.f;
+static integer c_n1 = -1;
+
+/* > \brief \b SGETRS */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGETRS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetrs.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetrs.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetrs.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDA, LDB, N, NRHS */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               A( LDA, * ), B( LDB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGETRS solves a system of linear equations */
+/* >    A * X = B  or  A**T * X = B */
+/* > with a general N-by-N matrix A using the LU factorization computed */
+/* > by SGETRF. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations: */
+/* >          = 'N':  A * X = B  (No transpose) */
+/* >          = 'T':  A**T* X = B  (Transpose) */
+/* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          The factors L and U from the factorization A = P*L*U */
+/* >          as computed by SGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices from SGETRF; for 1<=i<=N, row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the right hand side matrix B. */
+/* >          On exit, the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a, 
+	integer *lda, integer *ipiv, real *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), xerbla_(char *, integer *, ftnlen);
+    logical notran;
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer 
+	    *, integer *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETRS", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    if (notran) {
+
+/*        Solve A * X = B. */
+
+/*        Apply row interchanges to the right hand sides. */
+
+	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+
+/*        Solve L*X = B, overwriting B with X. */
+
+	strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*        Solve A**T * X = B. */
+
+/*        Solve U**T *X = B, overwriting B with X. */
+
+	strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L**T *X = B, overwriting B with X. */
+
+	strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Apply row interchanges to the solution vectors. */
+
+	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
+
+    return 0;
+
+/*     End of SGETRS */
+
+} /* sgetrs_ */
+
diff --git a/lapack-netlib/SRC/sgetsls.c b/lapack-netlib/SRC/sgetsls.c
new file mode 100644
index 000000000..e2bfef09e
--- /dev/null
+++ b/lapack-netlib/SRC/sgetsls.c
@@ -0,0 +1,929 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c_n2 = -2;
+static real c_b23 = 0.f;
+static integer c__0 = 0;
+
+/* > \brief \b SGETSLS */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, */
+/*     $                     WORK, LWORK, INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS */
+/*       REAL               A( LDA, * ), B( LDB, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGETSLS solves overdetermined or underdetermined real linear systems */
+/* > involving an M-by-N matrix A, using a tall skinny QR or short wide LQ */
+/* > factorization of A.  It is assumed that A has full rank. */
+/* > */
+/* > */
+/* > */
+/* > The following options are provided: */
+/* > */
+/* > 1. If TRANS = 'N' and m >= n:  find the least squares solution of */
+/* >    an overdetermined system, i.e., solve the least squares problem */
+/* >                 minimize || B - A*X ||. */
+/* > */
+/* > 2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
+/* >    an underdetermined system A * X = B. */
+/* > */
+/* > 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of */
+/* >    an undetermined system A**T * X = B. */
+/* > */
+/* > 4. If TRANS = 'T' and m < n:  find the least squares solution of */
+/* >    an overdetermined system, i.e., solve the least squares problem */
+/* >                 minimize || B - A**T * X ||. */
+/* > */
+/* > Several right hand side vectors b and solution vectors x can be */
+/* > handled in a single call; they are stored as the columns of the */
+/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
+/* > matrix X. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N': the linear system involves A; */
+/* >          = 'T': the linear system involves A**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of */
+/* >          columns of the matrices B and X. NRHS >=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          A is overwritten by details of its QR or LQ */
+/* >          factorization as returned by SGEQR or SGELQ. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the matrix B of right hand side vectors, stored */
+/* >          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
+/* >          if TRANS = 'T'. */
+/* >          On exit, if INFO = 0, B is overwritten by the solution */
+/* >          vectors, stored columnwise: */
+/* >          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
+/* >          squares solution vectors. */
+/* >          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
+/* >          minimum norm solution vectors; */
+/* >          if TRANS = 'T' and m >= n, rows 1 to M of B contain the */
+/* >          minimum norm solution vectors; */
+/* >          if TRANS = 'T' and m < n, rows 1 to M of B contain the */
+/* >          least squares solution vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= MAX(1,M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          (workspace) REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
+/* >          or optimal, if query was assumed) LWORK. */
+/* >          See LWORK for details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1 or -2, then a workspace query is assumed. */
+/* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
+/* >          optimal performance and returns this value in WORK(1). */
+/* >          If LWORK = -2, the routine calculates minimal size of WORK and */
+/* >          returns this value in WORK(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO =  i, the i-th diagonal element of the */
+/* >                triangular factor of A is zero, so that A does not have */
+/* >                full rank; the least squares solution could not be */
+/* >                computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup realGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgetsls_(char *trans, integer *m, integer *n, integer *
+	nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, 
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    real anrm, bnrm;
+    logical tran;
+    integer brow, tszm, tszo, info2, i__, j, iascl, ibscl;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgelq_(integer *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, integer *);
+    integer minmn, maxmn;
+    extern /* Subroutine */ int sgeqr_(integer *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, integer *);
+    real workq[1];
+    extern /* Subroutine */ int slabad_(real *, real *);
+    real tq[5];
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    integer scllen;
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *), sgemlq_(char *, char *, integer *, integer *, integer *, 
+	    real *, integer *, real *, integer *, real *, integer *, real *, 
+	    integer *, integer *), slaset_(char *, integer *, 
+	    integer *, real *, real *, real *, integer *), sgemqr_(
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, real *, integer *, 
+	    integer *);
+    real smlnum;
+    integer wsizem, wsizeo;
+    logical lquery;
+    integer lw1, lw2;
+    extern /* Subroutine */ int strtrs_(char *, char *, char *, integer *, 
+	    integer *, real *, integer *, real *, integer *, integer *);
+    integer mnk, lwm, lwo;
+
+
+/*  -- LAPACK driver routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    minmn = f2cmin(*m,*n);
+    maxmn = f2cmax(*m,*n);
+    mnk = f2cmax(minmn,*nrhs);
+    tran = lsame_(trans, "T");
+
+    lquery = *lwork == -1 || *lwork == -2;
+    if (! (lsame_(trans, "N") || lsame_(trans, "T"))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*m);
+	if (*ldb < f2cmax(i__1,*n)) {
+	    *info = -8;
+	}
+    }
+
+    if (*info == 0) {
+
+/*     Determine the block size and minimum LWORK */
+
+	if (*m >= *n) {
+	    sgeqr_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
+	    tszo = (integer) tq[0];
+	    lwo = (integer) workq[0];
+	    sgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszo, &b[
+		    b_offset], ldb, workq, &c_n1, &info2);
+/* Computing MAX */
+	    i__1 = lwo, i__2 = (integer) workq[0];
+	    lwo = f2cmax(i__1,i__2);
+	    sgeqr_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
+	    tszm = (integer) tq[0];
+	    lwm = (integer) workq[0];
+	    sgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszm, &b[
+		    b_offset], ldb, workq, &c_n1, &info2);
+/* Computing MAX */
+	    i__1 = lwm, i__2 = (integer) workq[0];
+	    lwm = f2cmax(i__1,i__2);
+	    wsizeo = tszo + lwo;
+	    wsizem = tszm + lwm;
+	} else {
+	    sgelq_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
+	    tszo = (integer) tq[0];
+	    lwo = (integer) workq[0];
+	    sgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszo, &b[
+		    b_offset], ldb, workq, &c_n1, &info2);
+/* Computing MAX */
+	    i__1 = lwo, i__2 = (integer) workq[0];
+	    lwo = f2cmax(i__1,i__2);
+	    sgelq_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
+	    tszm = (integer) tq[0];
+	    lwm = (integer) workq[0];
+	    sgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszm, &b[
+		    b_offset], ldb, workq, &c_n1, &info2);
+/* Computing MAX */
+	    i__1 = lwm, i__2 = (integer) workq[0];
+	    lwm = f2cmax(i__1,i__2);
+	    wsizeo = tszo + lwo;
+	    wsizem = tszm + lwm;
+	}
+
+	if (*lwork < wsizem && ! lquery) {
+	    *info = -10;
+	}
+
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETSLS", &i__1, (ftnlen)7);
+	work[1] = (real) wsizeo;
+	return 0;
+    }
+    if (lquery) {
+	if (*lwork == -1) {
+	    work[1] = (real) wsizeo;
+	}
+	if (*lwork == -2) {
+	    work[1] = (real) wsizem;
+	}
+	return 0;
+    }
+    if (*lwork < wsizeo) {
+	lw1 = tszm;
+	lw2 = lwm;
+    } else {
+	lw1 = tszo;
+	lw2 = lwo;
+    }
+
+/*     Quick return if possible */
+
+/* Computing MIN */
+    i__1 = f2cmin(*m,*n);
+    if (f2cmin(i__1,*nrhs) == 0) {
+	i__1 = f2cmax(*m,*n);
+	slaset_("FULL", &i__1, nrhs, &c_b23, &c_b23, &b[b_offset], ldb);
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = slamch_("S") / slamch_("P");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+
+/*     Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 2;
+    } else if (anrm == 0.f) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	slaset_("F", &maxmn, nrhs, &c_b23, &c_b23, &b[b_offset], ldb);
+	goto L50;
+    }
+
+    brow = *m;
+    if (tran) {
+	brow = *n;
+    }
+    bnrm = slange_("M", &brow, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0.f && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
+		ldb, info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
+		ldb, info);
+	ibscl = 2;
+    }
+
+    if (*m >= *n) {
+
+/*        compute QR factorization of A */
+
+	sgeqr_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
+		info);
+	if (! tran) {
+
+/*           Least-Squares Problem f2cmin || A * X - B || */
+
+/*           B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */
+
+	    sgemqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
+		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
+
+/*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */
+
+	    strtrs_("U", "N", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
+		    ldb, info);
+	    if (*info > 0) {
+		return 0;
+	    }
+	    scllen = *n;
+	} else {
+
+/*           Overdetermined system of equations A**T * X = B */
+
+/*           B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */
+
+	    strtrs_("U", "T", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
+		    ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+/*           B(N+1:M,1:NRHS) = ZERO */
+
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = *n + 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = 0.f;
+/* L10: */
+		}
+/* L20: */
+	    }
+
+/*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */
+
+	    sgemqr_("L", "N", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
+		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
+
+	    scllen = *m;
+
+	}
+
+    } else {
+
+/*        Compute LQ factorization of A */
+
+	sgelq_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
+		info);
+
+/*        workspace at least M, optimally M*NB. */
+
+	if (! tran) {
+
+/*           underdetermined system of equations A * X = B */
+
+/*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */
+
+	    strtrs_("L", "N", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], 
+		    ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+/*           B(M+1:N,1:NRHS) = 0 */
+
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = *m + 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = 0.f;
+/* L30: */
+		}
+/* L40: */
+	    }
+
+/*           B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */
+
+	    sgemlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
+		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+	    scllen = *n;
+
+	} else {
+
+/*           overdetermined system f2cmin || A**T * X - B || */
+
+/*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */
+
+	    sgemlq_("L", "N", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
+		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+/*           B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */
+
+	    strtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], 
+		    lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+	    scllen = *m;
+
+	}
+
+    }
+
+/*     Undo scaling */
+
+    if (iascl == 1) {
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    } else if (iascl == 2) {
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    }
+    if (ibscl == 1) {
+	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    } else if (ibscl == 2) {
+	slascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    }
+
+L50:
+    work[1] = (real) (tszo + lwo);
+    return 0;
+
+/*     End of SGETSLS */
+
+} /* sgetsls_ */
+
diff --git a/lapack-netlib/SRC/sgetsqrhrt.c b/lapack-netlib/SRC/sgetsqrhrt.c
new file mode 100644
index 000000000..29095f099
--- /dev/null
+++ b/lapack-netlib/SRC/sgetsqrhrt.c
@@ -0,0 +1,765 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SGETSQRHRT */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGETSQRHRT + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgetsqr
+hrt.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgetsqr
+hrt.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgetsqr
+hrt.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, */
+/*      $                       LWORK, INFO ) */
+/*       IMPLICIT NONE */
+
+/*       INTEGER           INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 */
+/*       REAL              A( LDA, * ), T( LDT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGETSQRHRT computes a NB2-sized column blocked QR-factorization */
+/* > of a complex M-by-N matrix A with M >= N, */
+/* > */
+/* >    A = Q * R. */
+/* > */
+/* > The routine uses internally a NB1-sized column blocked and MB1-sized */
+/* > row blocked TSQR-factorization and perfors the reconstruction */
+/* > of the Householder vectors from the TSQR output. The routine also */
+/* > converts the R_tsqr factor from the TSQR-factorization output into */
+/* > the R factor that corresponds to the Householder QR-factorization, */
+/* > */
+/* >    A = Q_tsqr * R_tsqr = Q * R. */
+/* > */
+/* > The output Q and R factors are stored in the same format as in SGEQRT */
+/* > (Q is in blocked compact WY-representation). See the documentation */
+/* > of SGEQRT for more details on the format. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A. M >= N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MB1 */
+/* > \verbatim */
+/* >          MB1 is INTEGER */
+/* >          The row block size to be used in the blocked TSQR. */
+/* >          MB1 > N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NB1 */
+/* > \verbatim */
+/* >          NB1 is INTEGER */
+/* >          The column block size to be used in the blocked TSQR. */
+/* >          N >= NB1 >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NB2 */
+/* > \verbatim */
+/* >          NB2 is INTEGER */
+/* >          The block size to be used in the blocked QR that is */
+/* >          output. NB2 >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* > */
+/* >          On entry: an M-by-N matrix A. */
+/* > */
+/* >          On exit: */
+/* >           a) the elements on and above the diagonal */
+/* >              of the array contain the N-by-N upper-triangular */
+/* >              matrix R corresponding to the Householder QR; */
+/* >           b) the elements below the diagonal represent Q by */
+/* >              the columns of blocked V (compact WY-representation). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT,N)) */
+/* >          The upper triangular block reflectors stored in compact form */
+/* >          as a sequence of upper triangular blocks. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= NB2. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          (workspace) REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          The dimension of the array WORK. */
+/* >          LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ), */
+/* >          where */
+/* >             NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)), */
+/* >             NB1LOCAL = MIN(NB1,N). */
+/* >             LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL, */
+/* >             LW1 = NB1LOCAL * N, */
+/* >             LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ), */
+/* >          If LWORK = -1, then a workspace query is assumed. */
+/* >          The routine only calculates the optimal size of the WORK */
+/* >          array, returns this value as the first entry of the WORK */
+/* >          array, and no error message related to LWORK is issued */
+/* >          by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \ingroup singleOTHERcomputational */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > November 2020, Igor Kozachenko, */
+/* >                Computer Science Division, */
+/* >                University of California, Berkeley */
+/* > */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgetsqrhrt_(integer *m, integer *n, integer *mb1, 
+	integer *nb1, integer *nb2, real *a, integer *lda, real *t, integer *
+	ldt, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2, r__3;
+
+    /* Local variables */
+    integer ldwt, lworkopt, i__, j, iinfo;
+    extern /* Subroutine */ int sorgtsqr_row_(integer *, integer *, integer *
+	    , integer *, real *, integer *, real *, integer *, real *, 
+	    integer *, integer *), scopy_(integer *, real *, integer *, real *
+	    , integer *), sorhr_col_(integer *, integer *, integer *, real *,
+	     integer *, real *, integer *, real *, integer *), xerbla_(char *,
+	     integer *, ftnlen);
+    logical lquery;
+    integer lw1, lw2, num_all_row_blocks__, lwt;
+    extern /* Subroutine */ int slatsqr_(integer *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , integer *);
+    integer nb1local, nb2local;
+
+
+/*  -- LAPACK computational routine -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0 || *m < *n) {
+	*info = -2;
+    } else if (*mb1 <= *n) {
+	*info = -3;
+    } else if (*nb1 < 1) {
+	*info = -4;
+    } else if (*nb2 < 1) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -7;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = f2cmin(*nb2,*n);
+	if (*ldt < f2cmax(i__1,i__2)) {
+	    *info = -9;
+	} else {
+
+/*        Test the input LWORK for the dimension of the array WORK. */
+/*        This workspace is used to store array: */
+/*        a) Matrix T and WORK for SLATSQR; */
+/*        b) N-by-N upper-triangular factor R_tsqr; */
+/*        c) Matrix T and array WORK for SORGTSQR_ROW; */
+/*        d) Diagonal D for SORHR_COL. */
+
+	    if (*lwork < *n * *n + 1 && ! lquery) {
+		*info = -11;
+	    } else {
+
+/*           Set block size for column blocks */
+
+		nb1local = f2cmin(*nb1,*n);
+
+/* Computing MAX */
+		r__3 = (real) (*m - *n) / (real) (*mb1 - *n) + .5f;
+		r__1 = 1.f, r__2 = r_int(&r__3);
+		num_all_row_blocks__ = f2cmax(r__1,r__2);
+
+/*           Length and leading dimension of WORK array to place */
+/*           T array in TSQR. */
+
+		lwt = num_all_row_blocks__ * *n * nb1local;
+		ldwt = nb1local;
+
+/*           Length of TSQR work array */
+
+		lw1 = nb1local * *n;
+
+/*           Length of SORGTSQR_ROW work array. */
+
+/* Computing MAX */
+		i__1 = nb1local, i__2 = *n - nb1local;
+		lw2 = nb1local * f2cmax(i__1,i__2);
+
+/* Computing MAX */
+/* Computing MAX */
+		i__3 = lwt + *n * *n + lw2, i__4 = lwt + *n * *n + *n;
+		i__1 = lwt + lw1, i__2 = f2cmax(i__3,i__4);
+		lworkopt = f2cmax(i__1,i__2);
+
+		if (*lwork < f2cmax(1,lworkopt) && ! lquery) {
+		    *info = -11;
+		}
+
+	    }
+	}
+    }
+
+/*     Handle error in the input parameters and return workspace query. */
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETSQRHRT", &i__1, (ftnlen)10);
+	return 0;
+    } else if (lquery) {
+	work[1] = (real) lworkopt;
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (f2cmin(*m,*n) == 0) {
+	work[1] = (real) lworkopt;
+	return 0;
+    }
+
+    nb2local = f2cmin(*nb2,*n);
+
+
+/*     (1) Perform TSQR-factorization of the M-by-N matrix A. */
+
+    slatsqr_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, &work[
+	    lwt + 1], &lw1, &iinfo);
+
+/*     (2) Copy the factor R_tsqr stored in the upper-triangular part */
+/*         of A into the square matrix in the work array */
+/*         WORK(LWT+1:LWT+N*N) column-by-column. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	scopy_(&j, &a[j * a_dim1 + 1], &c__1, &work[lwt + *n * (j - 1) + 1], &
+		c__1);
+    }
+
+/*     (3) Generate a M-by-N matrix Q with orthonormal columns from */
+/*     the result stored below the diagonal in the array A in place. */
+
+    sorgtsqr_row_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, &
+	    work[lwt + *n * *n + 1], &lw2, &iinfo);
+
+/*     (4) Perform the reconstruction of Householder vectors from */
+/*     the matrix Q (stored in A) in place. */
+
+    sorhr_col_(m, n, &nb2local, &a[a_offset], lda, &t[t_offset], ldt, &work[
+	    lwt + *n * *n + 1], &iinfo);
+
+/*     (5) Copy the factor R_tsqr stored in the square matrix in the */
+/*     work array WORK(LWT+1:LWT+N*N) into the upper-triangular */
+/*     part of A. */
+
+/*     (6) Compute from R_tsqr the factor R_hr corresponding to */
+/*     the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr. */
+/*     This multiplication by the sign matrix S on the left means */
+/*     changing the sign of I-th row of the matrix R_tsqr according */
+/*     to sign of the I-th diagonal element DIAG(I) of the matrix S. */
+/*     DIAG is stored in WORK( LWT+N*N+1 ) from the SORHR_COL output. */
+
+/*     (5) and (6) can be combined in a single loop, so the rows in A */
+/*     are accessed only once. */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (work[lwt + *n * *n + i__] == -1.f) {
+	    i__2 = *n;
+	    for (j = i__; j <= i__2; ++j) {
+		a[i__ + j * a_dim1] = work[lwt + *n * (j - 1) + i__] * -1.f;
+	    }
+	} else {
+	    i__2 = *n - i__ + 1;
+	    scopy_(&i__2, &work[lwt + *n * (i__ - 1) + i__], n, &a[i__ + i__ *
+		     a_dim1], lda);
+	}
+    }
+
+    work[1] = (real) lworkopt;
+    return 0;
+
+/*     End of SGETSQRHRT */
+
+} /* sgetsqrhrt_ */
+
diff --git a/lapack-netlib/SRC/sggbak.c b/lapack-netlib/SRC/sggbak.c
new file mode 100644
index 000000000..380f7ca82
--- /dev/null
+++ b/lapack-netlib/SRC/sggbak.c
@@ -0,0 +1,720 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGGBAK */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGBAK + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggbak.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggbak.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggbak.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, */
+/*                          LDV, INFO ) */
+
+/*       CHARACTER          JOB, SIDE */
+/*       INTEGER            IHI, ILO, INFO, LDV, M, N */
+/*       REAL               LSCALE( * ), RSCALE( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGBAK forms the right or left eigenvectors of a real generalized */
+/* > eigenvalue problem A*x = lambda*B*x, by backward transformation on */
+/* > the computed eigenvectors of the balanced pair of matrices output by */
+/* > SGGBAL. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOB */
+/* > \verbatim */
+/* >          JOB is CHARACTER*1 */
+/* >          Specifies the type of backward transformation required: */
+/* >          = 'N':  do nothing, return immediately; */
+/* >          = 'P':  do backward transformation for permutation only; */
+/* >          = 'S':  do backward transformation for scaling only; */
+/* >          = 'B':  do backward transformations for both permutation and */
+/* >                  scaling. */
+/* >          JOB must be the same as the argument JOB supplied to SGGBAL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'R':  V contains right eigenvectors; */
+/* >          = 'L':  V contains left eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of rows of the matrix V.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          The integers ILO and IHI determined by SGGBAL. */
+/* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LSCALE */
+/* > \verbatim */
+/* >          LSCALE is REAL array, dimension (N) */
+/* >          Details of the permutations and/or scaling factors applied */
+/* >          to the left side of A and B, as returned by SGGBAL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RSCALE */
+/* > \verbatim */
+/* >          RSCALE is REAL array, dimension (N) */
+/* >          Details of the permutations and/or scaling factors applied */
+/* >          to the right side of A and B, as returned by SGGBAL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of columns of the matrix V.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension (LDV,M) */
+/* >          On entry, the matrix of right or left eigenvectors to be */
+/* >          transformed, as returned by STGEVC. */
+/* >          On exit, V is overwritten by the transformed eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the matrix V. LDV >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGBcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  See R.C. Ward, Balancing the generalized eigenvalue problem, */
+/* >                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sggbak_(char *job, char *side, integer *n, integer *ilo, 
+	integer *ihi, real *lscale, real *rscale, integer *m, real *v, 
+	integer *ldv, integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, i__1;
+
+    /* Local variables */
+    integer i__, k;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    logical leftv;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *), xerbla_(char *, integer *, ftnlen);
+    logical rightv;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    --lscale;
+    --rscale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+
+    /* Function Body */
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
+
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (! rightv && ! leftv) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1) {
+	*info = -4;
+    } else if (*n == 0 && *ihi == 0 && *ilo != 1) {
+	*info = -4;
+    } else if (*n > 0 && (*ihi < *ilo || *ihi > f2cmax(1,*n))) {
+	*info = -5;
+    } else if (*n == 0 && *ilo == 1 && *ihi != 0) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -8;
+    } else if (*ldv < f2cmax(1,*n)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGBAK", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	goto L30;
+    }
+
+/*     Backward balance */
+
+    if (lsame_(job, "S") || lsame_(job, "B")) {
+
+/*        Backward transformation on right eigenvectors */
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		sscal_(m, &rscale[i__], &v[i__ + v_dim1], ldv);
+/* L10: */
+	    }
+	}
+
+/*        Backward transformation on left eigenvectors */
+
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		sscal_(m, &lscale[i__], &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
+    }
+
+/*     Backward permutation */
+
+L30:
+    if (lsame_(job, "P") || lsame_(job, "B")) {
+
+/*        Backward permutation on right eigenvectors */
+
+	if (rightv) {
+	    if (*ilo == 1) {
+		goto L50;
+	    }
+
+	    for (i__ = *ilo - 1; i__ >= 1; --i__) {
+		k = rscale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
+
+L50:
+	    if (*ihi == *n) {
+		goto L70;
+	    }
+	    i__1 = *n;
+	    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+		k = rscale[i__];
+		if (k == i__) {
+		    goto L60;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L60:
+		;
+	    }
+	}
+
+/*        Backward permutation on left eigenvectors */
+
+L70:
+	if (leftv) {
+	    if (*ilo == 1) {
+		goto L90;
+	    }
+	    for (i__ = *ilo - 1; i__ >= 1; --i__) {
+		k = lscale[i__];
+		if (k == i__) {
+		    goto L80;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L80:
+		;
+	    }
+
+L90:
+	    if (*ihi == *n) {
+		goto L110;
+	    }
+	    i__1 = *n;
+	    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+		k = lscale[i__];
+		if (k == i__) {
+		    goto L100;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L100:
+		;
+	    }
+	}
+    }
+
+L110:
+
+    return 0;
+
+/*     End of SGGBAK */
+
+} /* sggbak_ */
+
diff --git a/lapack-netlib/SRC/sggbal.c b/lapack-netlib/SRC/sggbal.c
new file mode 100644
index 000000000..0ba59666b
--- /dev/null
+++ b/lapack-netlib/SRC/sggbal.c
@@ -0,0 +1,1069 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b35 = 10.f;
+static real c_b71 = .5f;
+
+/* > \brief \b SGGBAL */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGBAL + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggbal.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggbal.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggbal.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, */
+/*                          RSCALE, WORK, INFO ) */
+
+/*       CHARACTER          JOB */
+/*       INTEGER            IHI, ILO, INFO, LDA, LDB, N */
+/*       REAL               A( LDA, * ), B( LDB, * ), LSCALE( * ), */
+/*      $                   RSCALE( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGBAL balances a pair of general real matrices (A,B).  This */
+/* > involves, first, permuting A and B by similarity transformations to */
+/* > isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N */
+/* > elements on the diagonal; and second, applying a diagonal similarity */
+/* > transformation to rows and columns ILO to IHI to make the rows */
+/* > and columns as close in norm as possible. Both steps are optional. */
+/* > */
+/* > Balancing may reduce the 1-norm of the matrices, and improve the */
+/* > accuracy of the computed eigenvalues and/or eigenvectors in the */
+/* > generalized eigenvalue problem A*x = lambda*B*x. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOB */
+/* > \verbatim */
+/* >          JOB is CHARACTER*1 */
+/* >          Specifies the operations to be performed on A and B: */
+/* >          = 'N':  none:  simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 */
+/* >                  and RSCALE(I) = 1.0 for i = 1,...,N. */
+/* >          = 'P':  permute only; */
+/* >          = 'S':  scale only; */
+/* >          = 'B':  both permute and scale. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices A and B.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the input matrix A. */
+/* >          On exit,  A is overwritten by the balanced matrix. */
+/* >          If JOB = 'N', A is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,N) */
+/* >          On entry, the input matrix B. */
+/* >          On exit,  B is overwritten by the balanced matrix. */
+/* >          If JOB = 'N', B is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          ILO and IHI are set to integers such that on exit */
+/* >          A(i,j) = 0 and B(i,j) = 0 if i > j and */
+/* >          j = 1,...,ILO-1 or i = IHI+1,...,N. */
+/* >          If JOB = 'N' or 'S', ILO = 1 and IHI = N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] LSCALE */
+/* > \verbatim */
+/* >          LSCALE is REAL array, dimension (N) */
+/* >          Details of the permutations and scaling factors applied */
+/* >          to the left side of A and B.  If P(j) is the index of the */
+/* >          row interchanged with row j, and D(j) */
+/* >          is the scaling factor applied to row j, then */
+/* >            LSCALE(j) = P(j)    for J = 1,...,ILO-1 */
+/* >                      = D(j)    for J = ILO,...,IHI */
+/* >                      = P(j)    for J = IHI+1,...,N. */
+/* >          The order in which the interchanges are made is N to IHI+1, */
+/* >          then 1 to ILO-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RSCALE */
+/* > \verbatim */
+/* >          RSCALE is REAL array, dimension (N) */
+/* >          Details of the permutations and scaling factors applied */
+/* >          to the right side of A and B.  If P(j) is the index of the */
+/* >          column interchanged with column j, and D(j) */
+/* >          is the scaling factor applied to column j, then */
+/* >            LSCALE(j) = P(j)    for J = 1,...,ILO-1 */
+/* >                      = D(j)    for J = ILO,...,IHI */
+/* >                      = P(j)    for J = IHI+1,...,N. */
+/* >          The order in which the interchanges are made is N to IHI+1, */
+/* >          then 1 to ILO-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (lwork) */
+/* >          lwork must be at least f2cmax(1,6*N) when JOB = 'S' or 'B', and */
+/* >          at least 1 when JOB = 'N' or 'P'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGBcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  See R.C. WARD, Balancing the generalized eigenvalue problem, */
+/* >                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sggbal_(char *job, integer *n, real *a, integer *lda, 
+	real *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, real 
+	*rscale, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+    real r__1, r__2, r__3;
+
+    /* Local variables */
+    integer lcab;
+    real beta, coef;
+    integer irab, lrab;
+    real basl, cmax;
+    extern real sdot_(integer *, real *, integer *, real *, integer *);
+    real coef2, coef5;
+    integer i__, j, k, l, m;
+    real gamma, t, alpha;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    real sfmin, sfmax;
+    integer iflow;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *);
+    integer kount;
+    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
+	    real *, integer *);
+    integer jc;
+    real ta, tb, tc;
+    integer ir, it;
+    real ew;
+    integer nr;
+    real pgamma;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer isamax_(integer *, real *, integer *);
+    integer lsfmin, lsfmax, ip1, jp1, lm1;
+    real cab, rab, ewc, cor, sum;
+    integer nrp2, icab;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --lscale;
+    --rscale;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -4;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGBAL", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	*ilo = 1;
+	*ihi = *n;
+	return 0;
+    }
+
+    if (*n == 1) {
+	*ilo = 1;
+	*ihi = *n;
+	lscale[1] = 1.f;
+	rscale[1] = 1.f;
+	return 0;
+    }
+
+    if (lsame_(job, "N")) {
+	*ilo = 1;
+	*ihi = *n;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    lscale[i__] = 1.f;
+	    rscale[i__] = 1.f;
+/* L10: */
+	}
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+    if (lsame_(job, "S")) {
+	goto L190;
+    }
+
+    goto L30;
+
+/*     Permute the matrices A and B to isolate the eigenvalues. */
+
+/*     Find row with one nonzero in columns 1 through L */
+
+L20:
+    l = lm1;
+    if (l != 1) {
+	goto L30;
+    }
+
+    rscale[1] = 1.f;
+    lscale[1] = 1.f;
+    goto L190;
+
+L30:
+    lm1 = l - 1;
+    for (i__ = l; i__ >= 1; --i__) {
+	i__1 = lm1;
+	for (j = 1; j <= i__1; ++j) {
+	    jp1 = j + 1;
+	    if (a[i__ + j * a_dim1] != 0.f || b[i__ + j * b_dim1] != 0.f) {
+		goto L50;
+	    }
+/* L40: */
+	}
+	j = l;
+	goto L70;
+
+L50:
+	i__1 = l;
+	for (j = jp1; j <= i__1; ++j) {
+	    if (a[i__ + j * a_dim1] != 0.f || b[i__ + j * b_dim1] != 0.f) {
+		goto L80;
+	    }
+/* L60: */
+	}
+	j = jp1 - 1;
+
+L70:
+	m = l;
+	iflow = 1;
+	goto L160;
+L80:
+	;
+    }
+    goto L100;
+
+/*     Find column with one nonzero in rows K through N */
+
+L90:
+    ++k;
+
+L100:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+	i__2 = lm1;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    ip1 = i__ + 1;
+	    if (a[i__ + j * a_dim1] != 0.f || b[i__ + j * b_dim1] != 0.f) {
+		goto L120;
+	    }
+/* L110: */
+	}
+	i__ = l;
+	goto L140;
+L120:
+	i__2 = l;
+	for (i__ = ip1; i__ <= i__2; ++i__) {
+	    if (a[i__ + j * a_dim1] != 0.f || b[i__ + j * b_dim1] != 0.f) {
+		goto L150;
+	    }
+/* L130: */
+	}
+	i__ = ip1 - 1;
+L140:
+	m = k;
+	iflow = 2;
+	goto L160;
+L150:
+	;
+    }
+    goto L190;
+
+/*     Permute rows M and I */
+
+L160:
+    lscale[m] = (real) i__;
+    if (i__ == m) {
+	goto L170;
+    }
+    i__1 = *n - k + 1;
+    sswap_(&i__1, &a[i__ + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+    i__1 = *n - k + 1;
+    sswap_(&i__1, &b[i__ + k * b_dim1], ldb, &b[m + k * b_dim1], ldb);
+
+/*     Permute columns M and J */
+
+L170:
+    rscale[m] = (real) j;
+    if (j == m) {
+	goto L180;
+    }
+    sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    sswap_(&l, &b[j * b_dim1 + 1], &c__1, &b[m * b_dim1 + 1], &c__1);
+
+L180:
+    switch (iflow) {
+	case 1:  goto L20;
+	case 2:  goto L90;
+    }
+
+L190:
+    *ilo = k;
+    *ihi = l;
+
+    if (lsame_(job, "P")) {
+	i__1 = *ihi;
+	for (i__ = *ilo; i__ <= i__1; ++i__) {
+	    lscale[i__] = 1.f;
+	    rscale[i__] = 1.f;
+/* L195: */
+	}
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	return 0;
+    }
+
+/*     Balance the submatrix in rows ILO to IHI. */
+
+    nr = *ihi - *ilo + 1;
+    i__1 = *ihi;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+	rscale[i__] = 0.f;
+	lscale[i__] = 0.f;
+
+	work[i__] = 0.f;
+	work[i__ + *n] = 0.f;
+	work[i__ + (*n << 1)] = 0.f;
+	work[i__ + *n * 3] = 0.f;
+	work[i__ + (*n << 2)] = 0.f;
+	work[i__ + *n * 5] = 0.f;
+/* L200: */
+    }
+
+/*     Compute right side vector in resulting linear equations */
+
+    basl = r_lg10(&c_b35);
+    i__1 = *ihi;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+	i__2 = *ihi;
+	for (j = *ilo; j <= i__2; ++j) {
+	    tb = b[i__ + j * b_dim1];
+	    ta = a[i__ + j * a_dim1];
+	    if (ta == 0.f) {
+		goto L210;
+	    }
+	    r__1 = abs(ta);
+	    ta = r_lg10(&r__1) / basl;
+L210:
+	    if (tb == 0.f) {
+		goto L220;
+	    }
+	    r__1 = abs(tb);
+	    tb = r_lg10(&r__1) / basl;
+L220:
+	    work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb;
+	    work[j + *n * 5] = work[j + *n * 5] - ta - tb;
+/* L230: */
+	}
+/* L240: */
+    }
+
+    coef = 1.f / (real) (nr << 1);
+    coef2 = coef * coef;
+    coef5 = coef2 * .5f;
+    nrp2 = nr + 2;
+    beta = 0.f;
+    it = 1;
+
+/*     Start generalized conjugate gradient iteration */
+
+L250:
+
+    gamma = sdot_(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)]
+	    , &c__1) + sdot_(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + *
+	    n * 5], &c__1);
+
+    ew = 0.f;
+    ewc = 0.f;
+    i__1 = *ihi;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+	ew += work[i__ + (*n << 2)];
+	ewc += work[i__ + *n * 5];
+/* L260: */
+    }
+
+/* Computing 2nd power */
+    r__1 = ew;
+/* Computing 2nd power */
+    r__2 = ewc;
+/* Computing 2nd power */
+    r__3 = ew - ewc;
+    gamma = coef * gamma - coef2 * (r__1 * r__1 + r__2 * r__2) - coef5 * (
+	    r__3 * r__3);
+    if (gamma == 0.f) {
+	goto L350;
+    }
+    if (it != 1) {
+	beta = gamma / pgamma;
+    }
+    t = coef5 * (ewc - ew * 3.f);
+    tc = coef5 * (ew - ewc * 3.f);
+
+    sscal_(&nr, &beta, &work[*ilo], &c__1);
+    sscal_(&nr, &beta, &work[*ilo + *n], &c__1);
+
+    saxpy_(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], &
+	    c__1);
+    saxpy_(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1);
+
+    i__1 = *ihi;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+	work[i__] += tc;
+	work[i__ + *n] += t;
+/* L270: */
+    }
+
+/*     Apply matrix to vector */
+
+    i__1 = *ihi;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+	kount = 0;
+	sum = 0.f;
+	i__2 = *ihi;
+	for (j = *ilo; j <= i__2; ++j) {
+	    if (a[i__ + j * a_dim1] == 0.f) {
+		goto L280;
+	    }
+	    ++kount;
+	    sum += work[j];
+L280:
+	    if (b[i__ + j * b_dim1] == 0.f) {
+		goto L290;
+	    }
+	    ++kount;
+	    sum += work[j];
+L290:
+	    ;
+	}
+	work[i__ + (*n << 1)] = (real) kount * work[i__ + *n] + sum;
+/* L300: */
+    }
+
+    i__1 = *ihi;
+    for (j = *ilo; j <= i__1; ++j) {
+	kount = 0;
+	sum = 0.f;
+	i__2 = *ihi;
+	for (i__ = *ilo; i__ <= i__2; ++i__) {
+	    if (a[i__ + j * a_dim1] == 0.f) {
+		goto L310;
+	    }
+	    ++kount;
+	    sum += work[i__ + *n];
+L310:
+	    if (b[i__ + j * b_dim1] == 0.f) {
+		goto L320;
+	    }
+	    ++kount;
+	    sum += work[i__ + *n];
+L320:
+	    ;
+	}
+	work[j + *n * 3] = (real) kount * work[j] + sum;
+/* L330: */
+    }
+
+    sum = sdot_(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) 
+	    + sdot_(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1);
+    alpha = gamma / sum;
+
+/*     Determine correction to current iteration */
+
+    cmax = 0.f;
+    i__1 = *ihi;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+	cor = alpha * work[i__ + *n];
+	if (abs(cor) > cmax) {
+	    cmax = abs(cor);
+	}
+	lscale[i__] += cor;
+	cor = alpha * work[i__];
+	if (abs(cor) > cmax) {
+	    cmax = abs(cor);
+	}
+	rscale[i__] += cor;
+/* L340: */
+    }
+    if (cmax < .5f) {
+	goto L350;
+    }
+
+    r__1 = -alpha;
+    saxpy_(&nr, &r__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)]
+	    , &c__1);
+    r__1 = -alpha;
+    saxpy_(&nr, &r__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], &
+	    c__1);
+
+    pgamma = gamma;
+    ++it;
+    if (it <= nrp2) {
+	goto L250;
+    }
+
+/*     End generalized conjugate gradient iteration */
+
+L350:
+    sfmin = slamch_("S");
+    sfmax = 1.f / sfmin;
+    lsfmin = (integer) (r_lg10(&sfmin) / basl + 1.f);
+    lsfmax = (integer) (r_lg10(&sfmax) / basl);
+    i__1 = *ihi;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+	i__2 = *n - *ilo + 1;
+	irab = isamax_(&i__2, &a[i__ + *ilo * a_dim1], lda);
+	rab = (r__1 = a[i__ + (irab + *ilo - 1) * a_dim1], abs(r__1));
+	i__2 = *n - *ilo + 1;
+	irab = isamax_(&i__2, &b[i__ + *ilo * b_dim1], ldb);
+/* Computing MAX */
+	r__2 = rab, r__3 = (r__1 = b[i__ + (irab + *ilo - 1) * b_dim1], abs(
+		r__1));
+	rab = f2cmax(r__2,r__3);
+	r__1 = rab + sfmin;
+	lrab = (integer) (r_lg10(&r__1) / basl + 1.f);
+	ir = lscale[i__] + r_sign(&c_b71, &lscale[i__]);
+/* Computing MIN */
+	i__2 = f2cmax(ir,lsfmin), i__2 = f2cmin(i__2,lsfmax), i__3 = lsfmax - lrab;
+	ir = f2cmin(i__2,i__3);
+	lscale[i__] = pow_ri(&c_b35, &ir);
+	icab = isamax_(ihi, &a[i__ * a_dim1 + 1], &c__1);
+	cab = (r__1 = a[icab + i__ * a_dim1], abs(r__1));
+	icab = isamax_(ihi, &b[i__ * b_dim1 + 1], &c__1);
+/* Computing MAX */
+	r__2 = cab, r__3 = (r__1 = b[icab + i__ * b_dim1], abs(r__1));
+	cab = f2cmax(r__2,r__3);
+	r__1 = cab + sfmin;
+	lcab = (integer) (r_lg10(&r__1) / basl + 1.f);
+	jc = rscale[i__] + r_sign(&c_b71, &rscale[i__]);
+/* Computing MIN */
+	i__2 = f2cmax(jc,lsfmin), i__2 = f2cmin(i__2,lsfmax), i__3 = lsfmax - lcab;
+	jc = f2cmin(i__2,i__3);
+	rscale[i__] = pow_ri(&c_b35, &jc);
+/* L360: */
+    }
+
+/*     Row scaling of matrices A and B */
+
+    i__1 = *ihi;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+	i__2 = *n - *ilo + 1;
+	sscal_(&i__2, &lscale[i__], &a[i__ + *ilo * a_dim1], lda);
+	i__2 = *n - *ilo + 1;
+	sscal_(&i__2, &lscale[i__], &b[i__ + *ilo * b_dim1], ldb);
+/* L370: */
+    }
+
+/*     Column scaling of matrices A and B */
+
+    i__1 = *ihi;
+    for (j = *ilo; j <= i__1; ++j) {
+	sscal_(ihi, &rscale[j], &a[j * a_dim1 + 1], &c__1);
+	sscal_(ihi, &rscale[j], &b[j * b_dim1 + 1], &c__1);
+/* L380: */
+    }
+
+    return 0;
+
+/*     End of SGGBAL */
+
+} /* sggbal_ */
+
diff --git a/lapack-netlib/SRC/sgges.c b/lapack-netlib/SRC/sgges.c
new file mode 100644
index 000000000..1cec32bea
--- /dev/null
+++ b/lapack-netlib/SRC/sgges.c
@@ -0,0 +1,1162 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+static real c_b38 = 0.f;
+static real c_b39 = 1.f;
+
+/* > \brief <b> SGGES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors f
+or GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGES + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgges.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgges.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgges.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, */
+/*                         SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, */
+/*                         LDVSR, WORK, LWORK, BWORK, INFO ) */
+
+/*       CHARACTER          JOBVSL, JOBVSR, SORT */
+/*       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM */
+/*       LOGICAL            BWORK( * ) */
+/*       REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */
+/*      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ), */
+/*      $                   VSR( LDVSR, * ), WORK( * ) */
+/*       LOGICAL            SELCTG */
+/*       EXTERNAL           SELCTG */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B), */
+/* > the generalized eigenvalues, the generalized real Schur form (S,T), */
+/* > optionally, the left and/or right matrices of Schur vectors (VSL and */
+/* > VSR). This gives the generalized Schur factorization */
+/* > */
+/* >          (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) */
+/* > */
+/* > Optionally, it also orders the eigenvalues so that a selected cluster */
+/* > of eigenvalues appears in the leading diagonal blocks of the upper */
+/* > quasi-triangular matrix S and the upper triangular matrix T.The */
+/* > leading columns of VSL and VSR then form an orthonormal basis for the */
+/* > corresponding left and right eigenspaces (deflating subspaces). */
+/* > */
+/* > (If only the generalized eigenvalues are needed, use the driver */
+/* > SGGEV instead, which is faster.) */
+/* > */
+/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */
+/* > or a ratio alpha/beta = w, such that  A - w*B is singular.  It is */
+/* > usually represented as the pair (alpha,beta), as there is a */
+/* > reasonable interpretation for beta=0 or both being zero. */
+/* > */
+/* > A pair of matrices (S,T) is in generalized real Schur form if T is */
+/* > upper triangular with non-negative diagonal and S is block upper */
+/* > triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond */
+/* > to real generalized eigenvalues, while 2-by-2 blocks of S will be */
+/* > "standardized" by making the corresponding elements of T have the */
+/* > form: */
+/* >         [  a  0  ] */
+/* >         [  0  b  ] */
+/* > */
+/* > and the pair of corresponding 2-by-2 blocks in S and T will have a */
+/* > complex conjugate pair of generalized eigenvalues. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVSL */
+/* > \verbatim */
+/* >          JOBVSL is CHARACTER*1 */
+/* >          = 'N':  do not compute the left Schur vectors; */
+/* >          = 'V':  compute the left Schur vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVSR */
+/* > \verbatim */
+/* >          JOBVSR is CHARACTER*1 */
+/* >          = 'N':  do not compute the right Schur vectors; */
+/* >          = 'V':  compute the right Schur vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SORT */
+/* > \verbatim */
+/* >          SORT is CHARACTER*1 */
+/* >          Specifies whether or not to order the eigenvalues on the */
+/* >          diagonal of the generalized Schur form. */
+/* >          = 'N':  Eigenvalues are not ordered; */
+/* >          = 'S':  Eigenvalues are ordered (see SELCTG); */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SELCTG */
+/* > \verbatim */
+/* >          SELCTG is a LOGICAL FUNCTION of three REAL arguments */
+/* >          SELCTG must be declared EXTERNAL in the calling subroutine. */
+/* >          If SORT = 'N', SELCTG is not referenced. */
+/* >          If SORT = 'S', SELCTG is used to select eigenvalues to sort */
+/* >          to the top left of the Schur form. */
+/* >          An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */
+/* >          SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */
+/* >          one of a complex conjugate pair of eigenvalues is selected, */
+/* >          then both complex eigenvalues are selected. */
+/* > */
+/* >          Note that in the ill-conditioned case, a selected complex */
+/* >          eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), */
+/* >          BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 */
+/* >          in this case. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices A, B, VSL, and VSR.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the first of the pair of matrices. */
+/* >          On exit, A has been overwritten by its generalized Schur */
+/* >          form S. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB, N) */
+/* >          On entry, the second of the pair of matrices. */
+/* >          On exit, B has been overwritten by its generalized Schur */
+/* >          form T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SDIM */
+/* > \verbatim */
+/* >          SDIM is INTEGER */
+/* >          If SORT = 'N', SDIM = 0. */
+/* >          If SORT = 'S', SDIM = number of eigenvalues (after sorting) */
+/* >          for which SELCTG is true.  (Complex conjugate pairs for which */
+/* >          SELCTG is true for either eigenvalue count as 2.) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAR */
+/* > \verbatim */
+/* >          ALPHAR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAI */
+/* > \verbatim */
+/* >          ALPHAI is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BETA */
+/* > \verbatim */
+/* >          BETA is REAL array, dimension (N) */
+/* >          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */
+/* >          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i, */
+/* >          and  BETA(j),j=1,...,N are the diagonals of the complex Schur */
+/* >          form (S,T) that would result if the 2-by-2 diagonal blocks of */
+/* >          the real Schur form of (A,B) were further reduced to */
+/* >          triangular form using 2-by-2 complex unitary transformations. */
+/* >          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */
+/* >          positive, then the j-th and (j+1)-st eigenvalues are a */
+/* >          complex conjugate pair, with ALPHAI(j+1) negative. */
+/* > */
+/* >          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */
+/* >          may easily over- or underflow, and BETA(j) may even be zero. */
+/* >          Thus, the user should avoid naively computing the ratio. */
+/* >          However, ALPHAR and ALPHAI will be always less than and */
+/* >          usually comparable with norm(A) in magnitude, and BETA always */
+/* >          less than and usually comparable with norm(B). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VSL */
+/* > \verbatim */
+/* >          VSL is REAL array, dimension (LDVSL,N) */
+/* >          If JOBVSL = 'V', VSL will contain the left Schur vectors. */
+/* >          Not referenced if JOBVSL = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVSL */
+/* > \verbatim */
+/* >          LDVSL is INTEGER */
+/* >          The leading dimension of the matrix VSL. LDVSL >=1, and */
+/* >          if JOBVSL = 'V', LDVSL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VSR */
+/* > \verbatim */
+/* >          VSR is REAL array, dimension (LDVSR,N) */
+/* >          If JOBVSR = 'V', VSR will contain the right Schur vectors. */
+/* >          Not referenced if JOBVSR = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVSR */
+/* > \verbatim */
+/* >          LDVSR is INTEGER */
+/* >          The leading dimension of the matrix VSR. LDVSR >= 1, and */
+/* >          if JOBVSR = 'V', LDVSR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If N = 0, LWORK >= 1, else LWORK >= f2cmax(8*N,6*N+16). */
+/* >          For good performance , LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BWORK */
+/* > \verbatim */
+/* >          BWORK is LOGICAL array, dimension (N) */
+/* >          Not referenced if SORT = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          = 1,...,N: */
+/* >                The QZ iteration failed.  (A,B) are not in Schur */
+/* >                form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */
+/* >                be correct for j=INFO+1,...,N. */
+/* >          > N:  =N+1: other than QZ iteration failed in SHGEQZ. */
+/* >                =N+2: after reordering, roundoff changed values of */
+/* >                      some complex eigenvalues so that leading */
+/* >                      eigenvalues in the Generalized Schur form no */
+/* >                      longer satisfy SELCTG=.TRUE.  This could also */
+/* >                      be caused due to scaling. */
+/* >                =N+3: reordering failed in STGSEN. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgges_(char *jobvsl, char *jobvsr, char *sort, L_fp 
+	selctg, integer *n, real *a, integer *lda, real *b, integer *ldb, 
+	integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, 
+	integer *ldvsl, real *vsr, integer *ldvsr, real *work, integer *lwork,
+	 logical *bwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, 
+	    vsr_dim1, vsr_offset, i__1, i__2;
+    real r__1;
+
+    /* Local variables */
+    real anrm, bnrm;
+    integer idum[1], ierr, itau, iwrk;
+    real pvsl, pvsr;
+    integer i__;
+    extern logical lsame_(char *, char *);
+    integer ileft, icols;
+    logical cursl, ilvsl, ilvsr;
+    integer irows;
+    logical lst2sl;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    integer ip;
+    extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, integer *
+	    ), sggbal_(char *, integer *, real *, integer *, 
+	    real *, integer *, integer *, integer *, real *, real *, real *, 
+	    integer *);
+    logical ilascl, ilbscl;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    real safmin;
+    extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , real *, integer *, integer *);
+    real safmax;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ijobvl, iright;
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *);
+    integer ijobvr;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *), slaset_(char *, integer *, 
+	    integer *, real *, real *, real *, integer *);
+    real anrmto, bnrmto;
+    logical lastsl;
+    extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, integer *, real *
+	    , real *, real *, real *, integer *, real *, integer *, real *, 
+	    integer *, integer *), stgsen_(integer *, 
+	    logical *, logical *, logical *, integer *, real *, integer *, 
+	    real *, integer *, real *, real *, real *, real *, integer *, 
+	    real *, integer *, integer *, real *, real *, real *, real *, 
+	    integer *, integer *, integer *, integer *);
+    integer minwrk, maxwrk;
+    real smlnum;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *);
+    logical wantst, lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    real dif[2];
+    integer ihi, ilo;
+    real eps;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --alphar;
+    --alphai;
+    --beta;
+    vsl_dim1 = *ldvsl;
+    vsl_offset = 1 + vsl_dim1 * 1;
+    vsl -= vsl_offset;
+    vsr_dim1 = *ldvsr;
+    vsr_offset = 1 + vsr_dim1 * 1;
+    vsr -= vsr_offset;
+    --work;
+    --bwork;
+
+    /* Function Body */
+    if (lsame_(jobvsl, "N")) {
+	ijobvl = 1;
+	ilvsl = FALSE_;
+    } else if (lsame_(jobvsl, "V")) {
+	ijobvl = 2;
+	ilvsl = TRUE_;
+    } else {
+	ijobvl = -1;
+	ilvsl = FALSE_;
+    }
+
+    if (lsame_(jobvsr, "N")) {
+	ijobvr = 1;
+	ilvsr = FALSE_;
+    } else if (lsame_(jobvsr, "V")) {
+	ijobvr = 2;
+	ilvsr = TRUE_;
+    } else {
+	ijobvr = -1;
+	ilvsr = FALSE_;
+    }
+
+    wantst = lsame_(sort, "S");
+
+/*     Test the input arguments */
+
+    *info = 0;
+    lquery = *lwork == -1;
+    if (ijobvl <= 0) {
+	*info = -1;
+    } else if (ijobvr <= 0) {
+	*info = -2;
+    } else if (! wantst && ! lsame_(sort, "N")) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) {
+	*info = -15;
+    } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) {
+	*info = -17;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV.) */
+
+    if (*info == 0) {
+	if (*n > 0) {
+/* Computing MAX */
+	    i__1 = *n << 3, i__2 = *n * 6 + 16;
+	    minwrk = f2cmax(i__1,i__2);
+	    maxwrk = minwrk - *n + *n * ilaenv_(&c__1, "SGEQRF", " ", n, &
+		    c__1, n, &c__0, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = minwrk - *n + *n * ilaenv_(&c__1, "SORMQR", 
+		    " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = f2cmax(i__1,i__2);
+	    if (ilvsl) {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = minwrk - *n + *n * ilaenv_(&c__1, "SOR"
+			"GQR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	} else {
+	    minwrk = 1;
+	    maxwrk = 1;
+	}
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -19;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGES ", &i__1, (ftnlen)5);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	*sdim = 0;
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    smlnum = sqrt(safmin) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]);
+    ilascl = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	anrmto = smlnum;
+	ilascl = TRUE_;
+    } else if (anrm > bignum) {
+	anrmto = bignum;
+	ilascl = TRUE_;
+    }
+    if (ilascl) {
+	slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]);
+    ilbscl = FALSE_;
+    if (bnrm > 0.f && bnrm < smlnum) {
+	bnrmto = smlnum;
+	ilbscl = TRUE_;
+    } else if (bnrm > bignum) {
+	bnrmto = bignum;
+	ilbscl = TRUE_;
+    }
+    if (ilbscl) {
+	slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
+		ierr);
+    }
+
+/*     Permute the matrix to make it more nearly triangular */
+/*     (Workspace: need 6*N + 2*N space for storing balancing factors) */
+
+    ileft = 1;
+    iright = *n + 1;
+    iwrk = iright + *n;
+    sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[
+	    ileft], &work[iright], &work[iwrk], &ierr);
+
+/*     Reduce B to triangular form (QR decomposition of B) */
+/*     (Workspace: need N, prefer N*NB) */
+
+    irows = ihi + 1 - ilo;
+    icols = *n + 1 - ilo;
+    itau = iwrk;
+    iwrk = itau + irows;
+    i__1 = *lwork + 1 - iwrk;
+    sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[
+	    iwrk], &i__1, &ierr);
+
+/*     Apply the orthogonal transformation to matrix A */
+/*     (Workspace: need N, prefer N*NB) */
+
+    i__1 = *lwork + 1 - iwrk;
+    sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, &
+	    work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, &
+	    ierr);
+
+/*     Initialize VSL */
+/*     (Workspace: need N, prefer N*NB) */
+
+    if (ilvsl) {
+	slaset_("Full", n, n, &c_b38, &c_b39, &vsl[vsl_offset], ldvsl);
+	if (irows > 1) {
+	    i__1 = irows - 1;
+	    i__2 = irows - 1;
+	    slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[
+		    ilo + 1 + ilo * vsl_dim1], ldvsl);
+	}
+	i__1 = *lwork + 1 - iwrk;
+	sorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, &
+		work[itau], &work[iwrk], &i__1, &ierr);
+    }
+
+/*     Initialize VSR */
+
+    if (ilvsr) {
+	slaset_("Full", n, n, &c_b38, &c_b39, &vsr[vsr_offset], ldvsr);
+    }
+
+/*     Reduce to generalized Hessenberg form */
+/*     (Workspace: none needed) */
+
+    sgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], 
+	    ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr);
+
+/*     Perform QZ algorithm, computing Schur vectors if desired */
+/*     (Workspace: need N) */
+
+    iwrk = itau;
+    i__1 = *lwork + 1 - iwrk;
+    shgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[
+	    b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset]
+	    , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr);
+    if (ierr != 0) {
+	if (ierr > 0 && ierr <= *n) {
+	    *info = ierr;
+	} else if (ierr > *n && ierr <= *n << 1) {
+	    *info = ierr - *n;
+	} else {
+	    *info = *n + 1;
+	}
+	goto L40;
+    }
+
+/*     Sort eigenvalues ALPHA/BETA if desired */
+/*     (Workspace: need 4*N+16 ) */
+
+    *sdim = 0;
+    if (wantst) {
+
+/*        Undo scaling on eigenvalues before SELCTGing */
+
+	if (ilascl) {
+	    slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], 
+		    n, &ierr);
+	    slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], 
+		    n, &ierr);
+	}
+	if (ilbscl) {
+	    slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, 
+		    &ierr);
+	}
+
+/*        Select eigenvalues */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]);
+/* L10: */
+	}
+
+	i__1 = *lwork - iwrk + 1;
+	stgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[
+		b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[
+		vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, &
+		pvsr, dif, &work[iwrk], &i__1, idum, &c__1, &ierr);
+	if (ierr == 1) {
+	    *info = *n + 3;
+	}
+
+    }
+
+/*     Apply back-permutation to VSL and VSR */
+/*     (Workspace: none needed) */
+
+    if (ilvsl) {
+	sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[
+		vsl_offset], ldvsl, &ierr);
+    }
+
+    if (ilvsr) {
+	sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[
+		vsr_offset], ldvsr, &ierr);
+    }
+
+/*     Check if unscaling would cause over/underflow, if so, rescale */
+/*     (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */
+/*     B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */
+
+    if (ilascl) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (alphai[i__] != 0.f) {
+		if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[
+			i__] > anrm / anrmto) {
+		    work[1] = (r__1 = a[i__ + i__ * a_dim1] / alphar[i__], 
+			    abs(r__1));
+		    beta[i__] *= work[1];
+		    alphar[i__] *= work[1];
+		    alphai[i__] *= work[1];
+		} else if (alphai[i__] / safmax > anrmto / anrm || safmin / 
+			alphai[i__] > anrm / anrmto) {
+		    work[1] = (r__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[
+			    i__], abs(r__1));
+		    beta[i__] *= work[1];
+		    alphar[i__] *= work[1];
+		    alphai[i__] *= work[1];
+		}
+	    }
+/* L50: */
+	}
+    }
+
+    if (ilbscl) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (alphai[i__] != 0.f) {
+		if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] 
+			> bnrm / bnrmto) {
+		    work[1] = (r__1 = b[i__ + i__ * b_dim1] / beta[i__], abs(
+			    r__1));
+		    beta[i__] *= work[1];
+		    alphar[i__] *= work[1];
+		    alphai[i__] *= work[1];
+		}
+	    }
+/* L60: */
+	}
+    }
+
+/*     Undo scaling */
+
+    if (ilascl) {
+	slascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &
+		ierr);
+    }
+
+    if (ilbscl) {
+	slascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
+		ierr);
+    }
+
+    if (wantst) {
+
+/*        Check if reordering is correct */
+
+	lastsl = TRUE_;
+	lst2sl = TRUE_;
+	*sdim = 0;
+	ip = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]);
+	    if (alphai[i__] == 0.f) {
+		if (cursl) {
+		    ++(*sdim);
+		}
+		ip = 0;
+		if (cursl && ! lastsl) {
+		    *info = *n + 2;
+		}
+	    } else {
+		if (ip == 1) {
+
+/*                 Last eigenvalue of conjugate pair */
+
+		    cursl = cursl || lastsl;
+		    lastsl = cursl;
+		    if (cursl) {
+			*sdim += 2;
+		    }
+		    ip = -1;
+		    if (cursl && ! lst2sl) {
+			*info = *n + 2;
+		    }
+		} else {
+
+/*                 First eigenvalue of conjugate pair */
+
+		    ip = 1;
+		}
+	    }
+	    lst2sl = lastsl;
+	    lastsl = cursl;
+/* L30: */
+	}
+
+    }
+
+L40:
+
+    work[1] = (real) maxwrk;
+
+    return 0;
+
+/*     End of SGGES */
+
+} /* sgges_ */
+
diff --git a/lapack-netlib/SRC/sgges3.c b/lapack-netlib/SRC/sgges3.c
new file mode 100644
index 000000000..518de890a
--- /dev/null
+++ b/lapack-netlib/SRC/sgges3.c
@@ -0,0 +1,1163 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__1 = 1;
+static integer c__0 = 0;
+static real c_b36 = 0.f;
+static real c_b37 = 1.f;
+
+/* > \brief <b> SGGES3 computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors 
+for GE matrices (blocked algorithm)</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGES3 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgges3.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgges3.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgges3.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, */
+/*      $                   LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, */
+/*      $                   VSR, LDVSR, WORK, LWORK, BWORK, INFO ) */
+
+/*       CHARACTER          JOBVSL, JOBVSR, SORT */
+/*       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM */
+/*       LOGICAL            BWORK( * ) */
+/*       REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */
+/*      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ), */
+/*      $                   VSR( LDVSR, * ), WORK( * ) */
+/*       LOGICAL            SELCTG */
+/*       EXTERNAL           SELCTG */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGES3 computes for a pair of N-by-N real nonsymmetric matrices (A,B), */
+/* > the generalized eigenvalues, the generalized real Schur form (S,T), */
+/* > optionally, the left and/or right matrices of Schur vectors (VSL and */
+/* > VSR). This gives the generalized Schur factorization */
+/* > */
+/* >          (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) */
+/* > */
+/* > Optionally, it also orders the eigenvalues so that a selected cluster */
+/* > of eigenvalues appears in the leading diagonal blocks of the upper */
+/* > quasi-triangular matrix S and the upper triangular matrix T.The */
+/* > leading columns of VSL and VSR then form an orthonormal basis for the */
+/* > corresponding left and right eigenspaces (deflating subspaces). */
+/* > */
+/* > (If only the generalized eigenvalues are needed, use the driver */
+/* > SGGEV instead, which is faster.) */
+/* > */
+/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */
+/* > or a ratio alpha/beta = w, such that  A - w*B is singular.  It is */
+/* > usually represented as the pair (alpha,beta), as there is a */
+/* > reasonable interpretation for beta=0 or both being zero. */
+/* > */
+/* > A pair of matrices (S,T) is in generalized real Schur form if T is */
+/* > upper triangular with non-negative diagonal and S is block upper */
+/* > triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond */
+/* > to real generalized eigenvalues, while 2-by-2 blocks of S will be */
+/* > "standardized" by making the corresponding elements of T have the */
+/* > form: */
+/* >         [  a  0  ] */
+/* >         [  0  b  ] */
+/* > */
+/* > and the pair of corresponding 2-by-2 blocks in S and T will have a */
+/* > complex conjugate pair of generalized eigenvalues. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVSL */
+/* > \verbatim */
+/* >          JOBVSL is CHARACTER*1 */
+/* >          = 'N':  do not compute the left Schur vectors; */
+/* >          = 'V':  compute the left Schur vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVSR */
+/* > \verbatim */
+/* >          JOBVSR is CHARACTER*1 */
+/* >          = 'N':  do not compute the right Schur vectors; */
+/* >          = 'V':  compute the right Schur vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SORT */
+/* > \verbatim */
+/* >          SORT is CHARACTER*1 */
+/* >          Specifies whether or not to order the eigenvalues on the */
+/* >          diagonal of the generalized Schur form. */
+/* >          = 'N':  Eigenvalues are not ordered; */
+/* >          = 'S':  Eigenvalues are ordered (see SELCTG); */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SELCTG */
+/* > \verbatim */
+/* >          SELCTG is a LOGICAL FUNCTION of three REAL arguments */
+/* >          SELCTG must be declared EXTERNAL in the calling subroutine. */
+/* >          If SORT = 'N', SELCTG is not referenced. */
+/* >          If SORT = 'S', SELCTG is used to select eigenvalues to sort */
+/* >          to the top left of the Schur form. */
+/* >          An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */
+/* >          SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */
+/* >          one of a complex conjugate pair of eigenvalues is selected, */
+/* >          then both complex eigenvalues are selected. */
+/* > */
+/* >          Note that in the ill-conditioned case, a selected complex */
+/* >          eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), */
+/* >          BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 */
+/* >          in this case. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices A, B, VSL, and VSR.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the first of the pair of matrices. */
+/* >          On exit, A has been overwritten by its generalized Schur */
+/* >          form S. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB, N) */
+/* >          On entry, the second of the pair of matrices. */
+/* >          On exit, B has been overwritten by its generalized Schur */
+/* >          form T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SDIM */
+/* > \verbatim */
+/* >          SDIM is INTEGER */
+/* >          If SORT = 'N', SDIM = 0. */
+/* >          If SORT = 'S', SDIM = number of eigenvalues (after sorting) */
+/* >          for which SELCTG is true.  (Complex conjugate pairs for which */
+/* >          SELCTG is true for either eigenvalue count as 2.) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAR */
+/* > \verbatim */
+/* >          ALPHAR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAI */
+/* > \verbatim */
+/* >          ALPHAI is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BETA */
+/* > \verbatim */
+/* >          BETA is REAL array, dimension (N) */
+/* >          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */
+/* >          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i, */
+/* >          and  BETA(j),j=1,...,N are the diagonals of the complex Schur */
+/* >          form (S,T) that would result if the 2-by-2 diagonal blocks of */
+/* >          the real Schur form of (A,B) were further reduced to */
+/* >          triangular form using 2-by-2 complex unitary transformations. */
+/* >          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */
+/* >          positive, then the j-th and (j+1)-st eigenvalues are a */
+/* >          complex conjugate pair, with ALPHAI(j+1) negative. */
+/* > */
+/* >          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */
+/* >          may easily over- or underflow, and BETA(j) may even be zero. */
+/* >          Thus, the user should avoid naively computing the ratio. */
+/* >          However, ALPHAR and ALPHAI will be always less than and */
+/* >          usually comparable with norm(A) in magnitude, and BETA always */
+/* >          less than and usually comparable with norm(B). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VSL */
+/* > \verbatim */
+/* >          VSL is REAL array, dimension (LDVSL,N) */
+/* >          If JOBVSL = 'V', VSL will contain the left Schur vectors. */
+/* >          Not referenced if JOBVSL = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVSL */
+/* > \verbatim */
+/* >          LDVSL is INTEGER */
+/* >          The leading dimension of the matrix VSL. LDVSL >=1, and */
+/* >          if JOBVSL = 'V', LDVSL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VSR */
+/* > \verbatim */
+/* >          VSR is REAL array, dimension (LDVSR,N) */
+/* >          If JOBVSR = 'V', VSR will contain the right Schur vectors. */
+/* >          Not referenced if JOBVSR = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVSR */
+/* > \verbatim */
+/* >          LDVSR is INTEGER */
+/* >          The leading dimension of the matrix VSR. LDVSR >= 1, and */
+/* >          if JOBVSR = 'V', LDVSR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BWORK */
+/* > \verbatim */
+/* >          BWORK is LOGICAL array, dimension (N) */
+/* >          Not referenced if SORT = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          = 1,...,N: */
+/* >                The QZ iteration failed.  (A,B) are not in Schur */
+/* >                form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */
+/* >                be correct for j=INFO+1,...,N. */
+/* >          > N:  =N+1: other than QZ iteration failed in SHGEQZ. */
+/* >                =N+2: after reordering, roundoff changed values of */
+/* >                      some complex eigenvalues so that leading */
+/* >                      eigenvalues in the Generalized Schur form no */
+/* >                      longer satisfy SELCTG=.TRUE.  This could also */
+/* >                      be caused due to scaling. */
+/* >                =N+3: reordering failed in STGSEN. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date January 2015 */
+
+/* > \ingroup realGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgges3_(char *jobvsl, char *jobvsr, char *sort, L_fp 
+	selctg, integer *n, real *a, integer *lda, real *b, integer *ldb, 
+	integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, 
+	integer *ldvsl, real *vsr, integer *ldvsr, real *work, integer *lwork,
+	 logical *bwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, 
+	    vsr_dim1, vsr_offset, i__1, i__2;
+    real r__1;
+
+    /* Local variables */
+    real anrm, bnrm;
+    integer idum[1], ierr, itau, iwrk;
+    real pvsl, pvsr;
+    integer i__;
+    extern logical lsame_(char *, char *);
+    integer ileft, icols;
+    logical cursl, ilvsl, ilvsr;
+    integer irows;
+    extern /* Subroutine */ int sgghd3_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, integer *)
+	    ;
+    logical lst2sl;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    integer ip;
+    extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, integer *
+	    ), sggbal_(char *, integer *, real *, integer *, 
+	    real *, integer *, integer *, integer *, real *, real *, real *, 
+	    integer *);
+    logical ilascl, ilbscl;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    real safmax, bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    integer ijobvl, iright;
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *);
+    integer ijobvr;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *), slaset_(char *, integer *, 
+	    integer *, real *, real *, real *, integer *);
+    real anrmto, bnrmto;
+    logical lastsl;
+    extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, integer *, real *
+	    , real *, real *, real *, integer *, real *, integer *, real *, 
+	    integer *, integer *), stgsen_(integer *, 
+	    logical *, logical *, logical *, integer *, real *, integer *, 
+	    real *, integer *, real *, real *, real *, real *, integer *, 
+	    real *, integer *, integer *, real *, real *, real *, real *, 
+	    integer *, integer *, integer *, integer *);
+    real smlnum;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *);
+    logical wantst, lquery;
+    integer lwkopt;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    real dif[2];
+    integer ihi, ilo;
+    real eps;
+
+
+/*  -- LAPACK driver routine (version 3.6.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     January 2015 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --alphar;
+    --alphai;
+    --beta;
+    vsl_dim1 = *ldvsl;
+    vsl_offset = 1 + vsl_dim1 * 1;
+    vsl -= vsl_offset;
+    vsr_dim1 = *ldvsr;
+    vsr_offset = 1 + vsr_dim1 * 1;
+    vsr -= vsr_offset;
+    --work;
+    --bwork;
+
+    /* Function Body */
+    if (lsame_(jobvsl, "N")) {
+	ijobvl = 1;
+	ilvsl = FALSE_;
+    } else if (lsame_(jobvsl, "V")) {
+	ijobvl = 2;
+	ilvsl = TRUE_;
+    } else {
+	ijobvl = -1;
+	ilvsl = FALSE_;
+    }
+
+    if (lsame_(jobvsr, "N")) {
+	ijobvr = 1;
+	ilvsr = FALSE_;
+    } else if (lsame_(jobvsr, "V")) {
+	ijobvr = 2;
+	ilvsr = TRUE_;
+    } else {
+	ijobvr = -1;
+	ilvsr = FALSE_;
+    }
+
+    wantst = lsame_(sort, "S");
+
+/*     Test the input arguments */
+
+    *info = 0;
+    lquery = *lwork == -1;
+    if (ijobvl <= 0) {
+	*info = -1;
+    } else if (ijobvr <= 0) {
+	*info = -2;
+    } else if (! wantst && ! lsame_(sort, "N")) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) {
+	*info = -15;
+    } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) {
+	*info = -17;
+    } else if (*lwork < *n * 6 + 16 && ! lquery) {
+	*info = -19;
+    }
+
+/*     Compute workspace */
+
+    if (*info == 0) {
+	sgeqrf_(n, n, &b[b_offset], ldb, &work[1], &work[1], &c_n1, &ierr);
+/* Computing MAX */
+	i__1 = *n * 6 + 16, i__2 = *n * 3 + (integer) work[1];
+	lwkopt = f2cmax(i__1,i__2);
+	sormqr_("L", "T", n, n, n, &b[b_offset], ldb, &work[1], &a[a_offset], 
+		lda, &work[1], &c_n1, &ierr);
+/* Computing MAX */
+	i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1];
+	lwkopt = f2cmax(i__1,i__2);
+	if (ilvsl) {
+	    sorgqr_(n, n, n, &vsl[vsl_offset], ldvsl, &work[1], &work[1], &
+		    c_n1, &ierr);
+/* Computing MAX */
+	    i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1];
+	    lwkopt = f2cmax(i__1,i__2);
+	}
+	sgghd3_(jobvsl, jobvsr, n, &c__1, n, &a[a_offset], lda, &b[b_offset], 
+		ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[
+		1], &c_n1, &ierr);
+/* Computing MAX */
+	i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1];
+	lwkopt = f2cmax(i__1,i__2);
+	shgeqz_("S", jobvsl, jobvsr, n, &c__1, n, &a[a_offset], lda, &b[
+		b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[
+		vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[1], &c_n1, 
+		&ierr);
+/* Computing MAX */
+	i__1 = lwkopt, i__2 = (*n << 1) + (integer) work[1];
+	lwkopt = f2cmax(i__1,i__2);
+	if (wantst) {
+	    stgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &
+		    b[b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[
+		    vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, 
+		    &pvsr, dif, &work[1], &c_n1, idum, &c__1, &ierr);
+/* Computing MAX */
+	    i__1 = lwkopt, i__2 = (*n << 1) + (integer) work[1];
+	    lwkopt = f2cmax(i__1,i__2);
+	}
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGES3 ", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	*sdim = 0;
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    smlnum = sqrt(safmin) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]);
+    ilascl = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	anrmto = smlnum;
+	ilascl = TRUE_;
+    } else if (anrm > bignum) {
+	anrmto = bignum;
+	ilascl = TRUE_;
+    }
+    if (ilascl) {
+	slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]);
+    ilbscl = FALSE_;
+    if (bnrm > 0.f && bnrm < smlnum) {
+	bnrmto = smlnum;
+	ilbscl = TRUE_;
+    } else if (bnrm > bignum) {
+	bnrmto = bignum;
+	ilbscl = TRUE_;
+    }
+    if (ilbscl) {
+	slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
+		ierr);
+    }
+
+/*     Permute the matrix to make it more nearly triangular */
+
+    ileft = 1;
+    iright = *n + 1;
+    iwrk = iright + *n;
+    sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[
+	    ileft], &work[iright], &work[iwrk], &ierr);
+
+/*     Reduce B to triangular form (QR decomposition of B) */
+
+    irows = ihi + 1 - ilo;
+    icols = *n + 1 - ilo;
+    itau = iwrk;
+    iwrk = itau + irows;
+    i__1 = *lwork + 1 - iwrk;
+    sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[
+	    iwrk], &i__1, &ierr);
+
+/*     Apply the orthogonal transformation to matrix A */
+
+    i__1 = *lwork + 1 - iwrk;
+    sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, &
+	    work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, &
+	    ierr);
+
+/*     Initialize VSL */
+
+    if (ilvsl) {
+	slaset_("Full", n, n, &c_b36, &c_b37, &vsl[vsl_offset], ldvsl);
+	if (irows > 1) {
+	    i__1 = irows - 1;
+	    i__2 = irows - 1;
+	    slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[
+		    ilo + 1 + ilo * vsl_dim1], ldvsl);
+	}
+	i__1 = *lwork + 1 - iwrk;
+	sorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, &
+		work[itau], &work[iwrk], &i__1, &ierr);
+    }
+
+/*     Initialize VSR */
+
+    if (ilvsr) {
+	slaset_("Full", n, n, &c_b36, &c_b37, &vsr[vsr_offset], ldvsr);
+    }
+
+/*     Reduce to generalized Hessenberg form */
+
+    i__1 = *lwork + 1 - iwrk;
+    sgghd3_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], 
+	    ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk]
+	    , &i__1, &ierr);
+
+/*     Perform QZ algorithm, computing Schur vectors if desired */
+
+    iwrk = itau;
+    i__1 = *lwork + 1 - iwrk;
+    shgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[
+	    b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset]
+	    , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr);
+    if (ierr != 0) {
+	if (ierr > 0 && ierr <= *n) {
+	    *info = ierr;
+	} else if (ierr > *n && ierr <= *n << 1) {
+	    *info = ierr - *n;
+	} else {
+	    *info = *n + 1;
+	}
+	goto L40;
+    }
+
+/*     Sort eigenvalues ALPHA/BETA if desired */
+
+    *sdim = 0;
+    if (wantst) {
+
+/*        Undo scaling on eigenvalues before SELCTGing */
+
+	if (ilascl) {
+	    slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], 
+		    n, &ierr);
+	    slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], 
+		    n, &ierr);
+	}
+	if (ilbscl) {
+	    slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, 
+		    &ierr);
+	}
+
+/*        Select eigenvalues */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]);
+/* L10: */
+	}
+
+	i__1 = *lwork - iwrk + 1;
+	stgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[
+		b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[
+		vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, &
+		pvsr, dif, &work[iwrk], &i__1, idum, &c__1, &ierr);
+	if (ierr == 1) {
+	    *info = *n + 3;
+	}
+
+    }
+
+/*     Apply back-permutation to VSL and VSR */
+
+    if (ilvsl) {
+	sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[
+		vsl_offset], ldvsl, &ierr);
+    }
+
+    if (ilvsr) {
+	sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[
+		vsr_offset], ldvsr, &ierr);
+    }
+
+/*     Check if unscaling would cause over/underflow, if so, rescale */
+/*     (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */
+/*     B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */
+
+    if (ilascl) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (alphai[i__] != 0.f) {
+		if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[
+			i__] > anrm / anrmto) {
+		    work[1] = (r__1 = a[i__ + i__ * a_dim1] / alphar[i__], 
+			    abs(r__1));
+		    beta[i__] *= work[1];
+		    alphar[i__] *= work[1];
+		    alphai[i__] *= work[1];
+		} else if (alphai[i__] / safmax > anrmto / anrm || safmin / 
+			alphai[i__] > anrm / anrmto) {
+		    work[1] = (r__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[
+			    i__], abs(r__1));
+		    beta[i__] *= work[1];
+		    alphar[i__] *= work[1];
+		    alphai[i__] *= work[1];
+		}
+	    }
+/* L50: */
+	}
+    }
+
+    if (ilbscl) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (alphai[i__] != 0.f) {
+		if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] 
+			> bnrm / bnrmto) {
+		    work[1] = (r__1 = b[i__ + i__ * b_dim1] / beta[i__], abs(
+			    r__1));
+		    beta[i__] *= work[1];
+		    alphar[i__] *= work[1];
+		    alphai[i__] *= work[1];
+		}
+	    }
+/* L60: */
+	}
+    }
+
+/*     Undo scaling */
+
+    if (ilascl) {
+	slascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &
+		ierr);
+    }
+
+    if (ilbscl) {
+	slascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
+		ierr);
+    }
+
+    if (wantst) {
+
+/*        Check if reordering is correct */
+
+	lastsl = TRUE_;
+	lst2sl = TRUE_;
+	*sdim = 0;
+	ip = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]);
+	    if (alphai[i__] == 0.f) {
+		if (cursl) {
+		    ++(*sdim);
+		}
+		ip = 0;
+		if (cursl && ! lastsl) {
+		    *info = *n + 2;
+		}
+	    } else {
+		if (ip == 1) {
+
+/*                 Last eigenvalue of conjugate pair */
+
+		    cursl = cursl || lastsl;
+		    lastsl = cursl;
+		    if (cursl) {
+			*sdim += 2;
+		    }
+		    ip = -1;
+		    if (cursl && ! lst2sl) {
+			*info = *n + 2;
+		    }
+		} else {
+
+/*                 First eigenvalue of conjugate pair */
+
+		    ip = 1;
+		}
+	    }
+	    lst2sl = lastsl;
+	    lastsl = cursl;
+/* L30: */
+	}
+
+    }
+
+L40:
+
+    work[1] = (real) lwkopt;
+
+    return 0;
+
+/*     End of SGGES3 */
+
+} /* sgges3_ */
+
diff --git a/lapack-netlib/SRC/sggesx.c b/lapack-netlib/SRC/sggesx.c
new file mode 100644
index 000000000..3703b9e44
--- /dev/null
+++ b/lapack-netlib/SRC/sggesx.c
@@ -0,0 +1,1308 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+static real c_b42 = 0.f;
+static real c_b43 = 1.f;
+
+/* > \brief <b> SGGESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors 
+for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGESX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggesx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggesx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggesx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, */
+/*                          B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, */
+/*                          VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, */
+/*                          LIWORK, BWORK, INFO ) */
+
+/*       CHARACTER          JOBVSL, JOBVSR, SENSE, SORT */
+/*       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, */
+/*      $                   SDIM */
+/*       LOGICAL            BWORK( * ) */
+/*       INTEGER            IWORK( * ) */
+/*       REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */
+/*      $                   B( LDB, * ), BETA( * ), RCONDE( 2 ), */
+/*      $                   RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ), */
+/*      $                   WORK( * ) */
+/*       LOGICAL            SELCTG */
+/*       EXTERNAL           SELCTG */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGESX computes for a pair of N-by-N real nonsymmetric matrices */
+/* > (A,B), the generalized eigenvalues, the real Schur form (S,T), and, */
+/* > optionally, the left and/or right matrices of Schur vectors (VSL and */
+/* > VSR).  This gives the generalized Schur factorization */
+/* > */
+/* >      (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) */
+/* > */
+/* > Optionally, it also orders the eigenvalues so that a selected cluster */
+/* > of eigenvalues appears in the leading diagonal blocks of the upper */
+/* > quasi-triangular matrix S and the upper triangular matrix T; computes */
+/* > a reciprocal condition number for the average of the selected */
+/* > eigenvalues (RCONDE); and computes a reciprocal condition number for */
+/* > the right and left deflating subspaces corresponding to the selected */
+/* > eigenvalues (RCONDV). The leading columns of VSL and VSR then form */
+/* > an orthonormal basis for the corresponding left and right eigenspaces */
+/* > (deflating subspaces). */
+/* > */
+/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */
+/* > or a ratio alpha/beta = w, such that  A - w*B is singular.  It is */
+/* > usually represented as the pair (alpha,beta), as there is a */
+/* > reasonable interpretation for beta=0 or for both being zero. */
+/* > */
+/* > A pair of matrices (S,T) is in generalized real Schur form if T is */
+/* > upper triangular with non-negative diagonal and S is block upper */
+/* > triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond */
+/* > to real generalized eigenvalues, while 2-by-2 blocks of S will be */
+/* > "standardized" by making the corresponding elements of T have the */
+/* > form: */
+/* >         [  a  0  ] */
+/* >         [  0  b  ] */
+/* > */
+/* > and the pair of corresponding 2-by-2 blocks in S and T will have a */
+/* > complex conjugate pair of generalized eigenvalues. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVSL */
+/* > \verbatim */
+/* >          JOBVSL is CHARACTER*1 */
+/* >          = 'N':  do not compute the left Schur vectors; */
+/* >          = 'V':  compute the left Schur vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVSR */
+/* > \verbatim */
+/* >          JOBVSR is CHARACTER*1 */
+/* >          = 'N':  do not compute the right Schur vectors; */
+/* >          = 'V':  compute the right Schur vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SORT */
+/* > \verbatim */
+/* >          SORT is CHARACTER*1 */
+/* >          Specifies whether or not to order the eigenvalues on the */
+/* >          diagonal of the generalized Schur form. */
+/* >          = 'N':  Eigenvalues are not ordered; */
+/* >          = 'S':  Eigenvalues are ordered (see SELCTG). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SELCTG */
+/* > \verbatim */
+/* >          SELCTG is a LOGICAL FUNCTION of three REAL arguments */
+/* >          SELCTG must be declared EXTERNAL in the calling subroutine. */
+/* >          If SORT = 'N', SELCTG is not referenced. */
+/* >          If SORT = 'S', SELCTG is used to select eigenvalues to sort */
+/* >          to the top left of the Schur form. */
+/* >          An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */
+/* >          SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */
+/* >          one of a complex conjugate pair of eigenvalues is selected, */
+/* >          then both complex eigenvalues are selected. */
+/* >          Note that a selected complex eigenvalue may no longer satisfy */
+/* >          SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering, */
+/* >          since ordering may change the value of complex eigenvalues */
+/* >          (especially if the eigenvalue is ill-conditioned), in this */
+/* >          case INFO is set to N+3. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SENSE */
+/* > \verbatim */
+/* >          SENSE is CHARACTER*1 */
+/* >          Determines which reciprocal condition numbers are computed. */
+/* >          = 'N':  None are computed; */
+/* >          = 'E':  Computed for average of selected eigenvalues only; */
+/* >          = 'V':  Computed for selected deflating subspaces only; */
+/* >          = 'B':  Computed for both. */
+/* >          If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices A, B, VSL, and VSR.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the first of the pair of matrices. */
+/* >          On exit, A has been overwritten by its generalized Schur */
+/* >          form S. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB, N) */
+/* >          On entry, the second of the pair of matrices. */
+/* >          On exit, B has been overwritten by its generalized Schur */
+/* >          form T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SDIM */
+/* > \verbatim */
+/* >          SDIM is INTEGER */
+/* >          If SORT = 'N', SDIM = 0. */
+/* >          If SORT = 'S', SDIM = number of eigenvalues (after sorting) */
+/* >          for which SELCTG is true.  (Complex conjugate pairs for which */
+/* >          SELCTG is true for either eigenvalue count as 2.) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAR */
+/* > \verbatim */
+/* >          ALPHAR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAI */
+/* > \verbatim */
+/* >          ALPHAI is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BETA */
+/* > \verbatim */
+/* >          BETA is REAL array, dimension (N) */
+/* >          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */
+/* >          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i */
+/* >          and BETA(j),j=1,...,N  are the diagonals of the complex Schur */
+/* >          form (S,T) that would result if the 2-by-2 diagonal blocks of */
+/* >          the real Schur form of (A,B) were further reduced to */
+/* >          triangular form using 2-by-2 complex unitary transformations. */
+/* >          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */
+/* >          positive, then the j-th and (j+1)-st eigenvalues are a */
+/* >          complex conjugate pair, with ALPHAI(j+1) negative. */
+/* > */
+/* >          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */
+/* >          may easily over- or underflow, and BETA(j) may even be zero. */
+/* >          Thus, the user should avoid naively computing the ratio. */
+/* >          However, ALPHAR and ALPHAI will be always less than and */
+/* >          usually comparable with norm(A) in magnitude, and BETA always */
+/* >          less than and usually comparable with norm(B). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VSL */
+/* > \verbatim */
+/* >          VSL is REAL array, dimension (LDVSL,N) */
+/* >          If JOBVSL = 'V', VSL will contain the left Schur vectors. */
+/* >          Not referenced if JOBVSL = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVSL */
+/* > \verbatim */
+/* >          LDVSL is INTEGER */
+/* >          The leading dimension of the matrix VSL. LDVSL >=1, and */
+/* >          if JOBVSL = 'V', LDVSL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VSR */
+/* > \verbatim */
+/* >          VSR is REAL array, dimension (LDVSR,N) */
+/* >          If JOBVSR = 'V', VSR will contain the right Schur vectors. */
+/* >          Not referenced if JOBVSR = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVSR */
+/* > \verbatim */
+/* >          LDVSR is INTEGER */
+/* >          The leading dimension of the matrix VSR. LDVSR >= 1, and */
+/* >          if JOBVSR = 'V', LDVSR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDE */
+/* > \verbatim */
+/* >          RCONDE is REAL array, dimension ( 2 ) */
+/* >          If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the */
+/* >          reciprocal condition numbers for the average of the selected */
+/* >          eigenvalues. */
+/* >          Not referenced if SENSE = 'N' or 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDV */
+/* > \verbatim */
+/* >          RCONDV is REAL array, dimension ( 2 ) */
+/* >          If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the */
+/* >          reciprocal condition numbers for the selected deflating */
+/* >          subspaces. */
+/* >          Not referenced if SENSE = 'N' or 'E'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', */
+/* >          LWORK >= f2cmax( 8*N, 6*N+16, 2*SDIM*(N-SDIM) ), else */
+/* >          LWORK >= f2cmax( 8*N, 6*N+16 ). */
+/* >          Note that 2*SDIM*(N-SDIM) <= N*N/2. */
+/* >          Note also that an error is only returned if */
+/* >          LWORK < f2cmax( 8*N, 6*N+16), but if SENSE = 'E' or 'V' or 'B' */
+/* >          this may not be large enough. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the bound on the optimal size of the WORK */
+/* >          array and the minimum size of the IWORK array, returns these */
+/* >          values as the first entries of the WORK and IWORK arrays, and */
+/* >          no error message related to LWORK or LIWORK is issued by */
+/* >          XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
+/* >          On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LIWORK */
+/* > \verbatim */
+/* >          LIWORK is INTEGER */
+/* >          The dimension of the array IWORK. */
+/* >          If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise */
+/* >          LIWORK >= N+6. */
+/* > */
+/* >          If LIWORK = -1, then a workspace query is assumed; the */
+/* >          routine only calculates the bound on the optimal size of the */
+/* >          WORK array and the minimum size of the IWORK array, returns */
+/* >          these values as the first entries of the WORK and IWORK */
+/* >          arrays, and no error message related to LWORK or LIWORK is */
+/* >          issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BWORK */
+/* > \verbatim */
+/* >          BWORK is LOGICAL array, dimension (N) */
+/* >          Not referenced if SORT = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          = 1,...,N: */
+/* >                The QZ iteration failed.  (A,B) are not in Schur */
+/* >                form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */
+/* >                be correct for j=INFO+1,...,N. */
+/* >          > N:  =N+1: other than QZ iteration failed in SHGEQZ */
+/* >                =N+2: after reordering, roundoff changed values of */
+/* >                      some complex eigenvalues so that leading */
+/* >                      eigenvalues in the Generalized Schur form no */
+/* >                      longer satisfy SELCTG=.TRUE.  This could also */
+/* >                      be caused due to scaling. */
+/* >                =N+3: reordering failed in STGSEN. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup realGEeigen */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  An approximate (asymptotic) bound on the average absolute error of */
+/* >  the selected eigenvalues is */
+/* > */
+/* >       EPS * norm((A, B)) / RCONDE( 1 ). */
+/* > */
+/* >  An approximate (asymptotic) bound on the maximum angular error in */
+/* >  the computed deflating subspaces is */
+/* > */
+/* >       EPS * norm((A, B)) / RCONDV( 2 ). */
+/* > */
+/* >  See LAPACK User's Guide, section 4.11 for more information. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp 
+	selctg, char *sense, integer *n, real *a, integer *lda, real *b, 
+	integer *ldb, integer *sdim, real *alphar, real *alphai, real *beta, 
+	real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *rconde, 
+	real *rcondv, real *work, integer *lwork, integer *iwork, integer *
+	liwork, logical *bwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, 
+	    vsr_dim1, vsr_offset, i__1, i__2;
+    real r__1;
+
+    /* Local variables */
+    integer ijob;
+    real anrm, bnrm;
+    integer ierr, itau, iwrk, lwrk, i__;
+    extern logical lsame_(char *, char *);
+    integer ileft, icols;
+    logical cursl, ilvsl, ilvsr;
+    integer irows;
+    logical lst2sl;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    integer ip;
+    real pl;
+    extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, integer *
+	    ), sggbal_(char *, integer *, real *, integer *, 
+	    real *, integer *, integer *, integer *, real *, real *, real *, 
+	    integer *);
+    real pr;
+    logical ilascl, ilbscl;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    real safmin;
+    extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , real *, integer *, integer *);
+    real safmax;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ijobvl, iright;
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *);
+    integer ijobvr;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *);
+    logical wantsb, wantse, lastsl;
+    integer liwmin;
+    real anrmto, bnrmto;
+    integer minwrk, maxwrk;
+    logical wantsn;
+    real smlnum;
+    extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, integer *, real *
+	    , real *, real *, real *, integer *, real *, integer *, real *, 
+	    integer *, integer *), slaset_(char *, 
+	    integer *, integer *, real *, real *, real *, integer *), 
+	    sorgqr_(integer *, integer *, integer *, real *, integer *, real *
+	    , real *, integer *, integer *), stgsen_(integer *, logical *, 
+	    logical *, logical *, integer *, real *, integer *, real *, 
+	    integer *, real *, real *, real *, real *, integer *, real *, 
+	    integer *, integer *, real *, real *, real *, real *, integer *, 
+	    integer *, integer *, integer *);
+    logical wantst, lquery, wantsv;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    real dif[2];
+    integer ihi, ilo;
+    real eps;
+
+
+/*  -- LAPACK driver routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --alphar;
+    --alphai;
+    --beta;
+    vsl_dim1 = *ldvsl;
+    vsl_offset = 1 + vsl_dim1 * 1;
+    vsl -= vsl_offset;
+    vsr_dim1 = *ldvsr;
+    vsr_offset = 1 + vsr_dim1 * 1;
+    vsr -= vsr_offset;
+    --rconde;
+    --rcondv;
+    --work;
+    --iwork;
+    --bwork;
+
+    /* Function Body */
+    if (lsame_(jobvsl, "N")) {
+	ijobvl = 1;
+	ilvsl = FALSE_;
+    } else if (lsame_(jobvsl, "V")) {
+	ijobvl = 2;
+	ilvsl = TRUE_;
+    } else {
+	ijobvl = -1;
+	ilvsl = FALSE_;
+    }
+
+    if (lsame_(jobvsr, "N")) {
+	ijobvr = 1;
+	ilvsr = FALSE_;
+    } else if (lsame_(jobvsr, "V")) {
+	ijobvr = 2;
+	ilvsr = TRUE_;
+    } else {
+	ijobvr = -1;
+	ilvsr = FALSE_;
+    }
+
+    wantst = lsame_(sort, "S");
+    wantsn = lsame_(sense, "N");
+    wantse = lsame_(sense, "E");
+    wantsv = lsame_(sense, "V");
+    wantsb = lsame_(sense, "B");
+    lquery = *lwork == -1 || *liwork == -1;
+    if (wantsn) {
+	ijob = 0;
+    } else if (wantse) {
+	ijob = 1;
+    } else if (wantsv) {
+	ijob = 2;
+    } else if (wantsb) {
+	ijob = 4;
+    }
+
+/*     Test the input arguments */
+
+    *info = 0;
+    if (ijobvl <= 0) {
+	*info = -1;
+    } else if (ijobvr <= 0) {
+	*info = -2;
+    } else if (! wantst && ! lsame_(sort, "N")) {
+	*info = -3;
+    } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! 
+	    wantsn) {
+	*info = -5;
+    } else if (*n < 0) {
+	*info = -6;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -10;
+    } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) {
+	*info = -16;
+    } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) {
+	*info = -18;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV.) */
+
+    if (*info == 0) {
+	if (*n > 0) {
+/* Computing MAX */
+	    i__1 = *n << 3, i__2 = *n * 6 + 16;
+	    minwrk = f2cmax(i__1,i__2);
+	    maxwrk = minwrk - *n + *n * ilaenv_(&c__1, "SGEQRF", " ", n, &
+		    c__1, n, &c__0, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = minwrk - *n + *n * ilaenv_(&c__1, "SORMQR", 
+		    " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = f2cmax(i__1,i__2);
+	    if (ilvsl) {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = minwrk - *n + *n * ilaenv_(&c__1, "SOR"
+			"GQR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	    lwrk = maxwrk;
+	    if (ijob >= 1) {
+/* Computing MAX */
+		i__1 = lwrk, i__2 = *n * *n / 2;
+		lwrk = f2cmax(i__1,i__2);
+	    }
+	} else {
+	    minwrk = 1;
+	    maxwrk = 1;
+	    lwrk = 1;
+	}
+	work[1] = (real) lwrk;
+	if (wantsn || *n == 0) {
+	    liwmin = 1;
+	} else {
+	    liwmin = *n + 6;
+	}
+	iwork[1] = liwmin;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -22;
+	} else if (*liwork < liwmin && ! lquery) {
+	    *info = -24;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGESX", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	*sdim = 0;
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    smlnum = sqrt(safmin) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]);
+    ilascl = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	anrmto = smlnum;
+	ilascl = TRUE_;
+    } else if (anrm > bignum) {
+	anrmto = bignum;
+	ilascl = TRUE_;
+    }
+    if (ilascl) {
+	slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]);
+    ilbscl = FALSE_;
+    if (bnrm > 0.f && bnrm < smlnum) {
+	bnrmto = smlnum;
+	ilbscl = TRUE_;
+    } else if (bnrm > bignum) {
+	bnrmto = bignum;
+	ilbscl = TRUE_;
+    }
+    if (ilbscl) {
+	slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
+		ierr);
+    }
+
+/*     Permute the matrix to make it more nearly triangular */
+/*     (Workspace: need 6*N + 2*N for permutation parameters) */
+
+    ileft = 1;
+    iright = *n + 1;
+    iwrk = iright + *n;
+    sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[
+	    ileft], &work[iright], &work[iwrk], &ierr);
+
+/*     Reduce B to triangular form (QR decomposition of B) */
+/*     (Workspace: need N, prefer N*NB) */
+
+    irows = ihi + 1 - ilo;
+    icols = *n + 1 - ilo;
+    itau = iwrk;
+    iwrk = itau + irows;
+    i__1 = *lwork + 1 - iwrk;
+    sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[
+	    iwrk], &i__1, &ierr);
+
+/*     Apply the orthogonal transformation to matrix A */
+/*     (Workspace: need N, prefer N*NB) */
+
+    i__1 = *lwork + 1 - iwrk;
+    sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, &
+	    work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, &
+	    ierr);
+
+/*     Initialize VSL */
+/*     (Workspace: need N, prefer N*NB) */
+
+    if (ilvsl) {
+	slaset_("Full", n, n, &c_b42, &c_b43, &vsl[vsl_offset], ldvsl);
+	if (irows > 1) {
+	    i__1 = irows - 1;
+	    i__2 = irows - 1;
+	    slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[
+		    ilo + 1 + ilo * vsl_dim1], ldvsl);
+	}
+	i__1 = *lwork + 1 - iwrk;
+	sorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, &
+		work[itau], &work[iwrk], &i__1, &ierr);
+    }
+
+/*     Initialize VSR */
+
+    if (ilvsr) {
+	slaset_("Full", n, n, &c_b42, &c_b43, &vsr[vsr_offset], ldvsr);
+    }
+
+/*     Reduce to generalized Hessenberg form */
+/*     (Workspace: none needed) */
+
+    sgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], 
+	    ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr);
+
+    *sdim = 0;
+
+/*     Perform QZ algorithm, computing Schur vectors if desired */
+/*     (Workspace: need N) */
+
+    iwrk = itau;
+    i__1 = *lwork + 1 - iwrk;
+    shgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[
+	    b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset]
+	    , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr);
+    if (ierr != 0) {
+	if (ierr > 0 && ierr <= *n) {
+	    *info = ierr;
+	} else if (ierr > *n && ierr <= *n << 1) {
+	    *info = ierr - *n;
+	} else {
+	    *info = *n + 1;
+	}
+	goto L50;
+    }
+
+/*     Sort eigenvalues ALPHA/BETA and compute the reciprocal of */
+/*     condition number(s) */
+/*     (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) ) */
+/*                 otherwise, need 8*(N+1) ) */
+
+    if (wantst) {
+
+/*        Undo scaling on eigenvalues before SELCTGing */
+
+	if (ilascl) {
+	    slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], 
+		    n, &ierr);
+	    slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], 
+		    n, &ierr);
+	}
+	if (ilbscl) {
+	    slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, 
+		    &ierr);
+	}
+
+/*        Select eigenvalues */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]);
+/* L10: */
+	}
+
+/*        Reorder eigenvalues, transform Generalized Schur vectors, and */
+/*        compute reciprocal condition numbers */
+
+	i__1 = *lwork - iwrk + 1;
+	stgsen_(&ijob, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[
+		b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[
+		vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pl, &pr, 
+		dif, &work[iwrk], &i__1, &iwork[1], liwork, &ierr);
+
+	if (ijob >= 1) {
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = (*sdim << 1) * (*n - *sdim);
+	    maxwrk = f2cmax(i__1,i__2);
+	}
+	if (ierr == -22) {
+
+/*            not enough real workspace */
+
+	    *info = -22;
+	} else {
+	    if (ijob == 1 || ijob == 4) {
+		rconde[1] = pl;
+		rconde[2] = pr;
+	    }
+	    if (ijob == 2 || ijob == 4) {
+		rcondv[1] = dif[0];
+		rcondv[2] = dif[1];
+	    }
+	    if (ierr == 1) {
+		*info = *n + 3;
+	    }
+	}
+
+    }
+
+/*     Apply permutation to VSL and VSR */
+/*     (Workspace: none needed) */
+
+    if (ilvsl) {
+	sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[
+		vsl_offset], ldvsl, &ierr);
+    }
+
+    if (ilvsr) {
+	sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[
+		vsr_offset], ldvsr, &ierr);
+    }
+
+/*     Check if unscaling would cause over/underflow, if so, rescale */
+/*     (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */
+/*     B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */
+
+    if (ilascl) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (alphai[i__] != 0.f) {
+		if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[
+			i__] > anrm / anrmto) {
+		    work[1] = (r__1 = a[i__ + i__ * a_dim1] / alphar[i__], 
+			    abs(r__1));
+		    beta[i__] *= work[1];
+		    alphar[i__] *= work[1];
+		    alphai[i__] *= work[1];
+		} else if (alphai[i__] / safmax > anrmto / anrm || safmin / 
+			alphai[i__] > anrm / anrmto) {
+		    work[1] = (r__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[
+			    i__], abs(r__1));
+		    beta[i__] *= work[1];
+		    alphar[i__] *= work[1];
+		    alphai[i__] *= work[1];
+		}
+	    }
+/* L20: */
+	}
+    }
+
+    if (ilbscl) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (alphai[i__] != 0.f) {
+		if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] 
+			> bnrm / bnrmto) {
+		    work[1] = (r__1 = b[i__ + i__ * b_dim1] / beta[i__], abs(
+			    r__1));
+		    beta[i__] *= work[1];
+		    alphar[i__] *= work[1];
+		    alphai[i__] *= work[1];
+		}
+	    }
+/* L25: */
+	}
+    }
+
+/*     Undo scaling */
+
+    if (ilascl) {
+	slascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &
+		ierr);
+    }
+
+    if (ilbscl) {
+	slascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
+		ierr);
+    }
+
+    if (wantst) {
+
+/*        Check if reordering is correct */
+
+	lastsl = TRUE_;
+	lst2sl = TRUE_;
+	*sdim = 0;
+	ip = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]);
+	    if (alphai[i__] == 0.f) {
+		if (cursl) {
+		    ++(*sdim);
+		}
+		ip = 0;
+		if (cursl && ! lastsl) {
+		    *info = *n + 2;
+		}
+	    } else {
+		if (ip == 1) {
+
+/*                 Last eigenvalue of conjugate pair */
+
+		    cursl = cursl || lastsl;
+		    lastsl = cursl;
+		    if (cursl) {
+			*sdim += 2;
+		    }
+		    ip = -1;
+		    if (cursl && ! lst2sl) {
+			*info = *n + 2;
+		    }
+		} else {
+
+/*                 First eigenvalue of conjugate pair */
+
+		    ip = 1;
+		}
+	    }
+	    lst2sl = lastsl;
+	    lastsl = cursl;
+/* L40: */
+	}
+
+    }
+
+L50:
+
+    work[1] = (real) maxwrk;
+    iwork[1] = liwmin;
+
+    return 0;
+
+/*     End of SGGESX */
+
+} /* sggesx_ */
+
diff --git a/lapack-netlib/SRC/sggev.c b/lapack-netlib/SRC/sggev.c
new file mode 100644
index 000000000..39357ba9e
--- /dev/null
+++ b/lapack-netlib/SRC/sggev.c
@@ -0,0 +1,1099 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+static real c_b36 = 0.f;
+static real c_b37 = 1.f;
+
+/* > \brief <b> SGGEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matr
+ices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGEV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggev.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggev.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggev.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, */
+/*                         BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          JOBVL, JOBVR */
+/*       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N */
+/*       REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */
+/*      $                   B( LDB, * ), BETA( * ), VL( LDVL, * ), */
+/*      $                   VR( LDVR, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B) */
+/* > the generalized eigenvalues, and optionally, the left and/or right */
+/* > generalized eigenvectors. */
+/* > */
+/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */
+/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */
+/* > singular. It is usually represented as the pair (alpha,beta), as */
+/* > there is a reasonable interpretation for beta=0, and even for both */
+/* > being zero. */
+/* > */
+/* > The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */
+/* > of (A,B) satisfies */
+/* > */
+/* >                  A * v(j) = lambda(j) * B * v(j). */
+/* > */
+/* > The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */
+/* > of (A,B) satisfies */
+/* > */
+/* >                  u(j)**H * A  = lambda(j) * u(j)**H * B . */
+/* > */
+/* > where u(j)**H is the conjugate-transpose of u(j). */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVL */
+/* > \verbatim */
+/* >          JOBVL is CHARACTER*1 */
+/* >          = 'N':  do not compute the left generalized eigenvectors; */
+/* >          = 'V':  compute the left generalized eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVR */
+/* > \verbatim */
+/* >          JOBVR is CHARACTER*1 */
+/* >          = 'N':  do not compute the right generalized eigenvectors; */
+/* >          = 'V':  compute the right generalized eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices A, B, VL, and VR.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the matrix A in the pair (A,B). */
+/* >          On exit, A has been overwritten. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB, N) */
+/* >          On entry, the matrix B in the pair (A,B). */
+/* >          On exit, B has been overwritten. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAR */
+/* > \verbatim */
+/* >          ALPHAR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAI */
+/* > \verbatim */
+/* >          ALPHAI is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BETA */
+/* > \verbatim */
+/* >          BETA is REAL array, dimension (N) */
+/* >          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */
+/* >          be the generalized eigenvalues.  If ALPHAI(j) is zero, then */
+/* >          the j-th eigenvalue is real; if positive, then the j-th and */
+/* >          (j+1)-st eigenvalues are a complex conjugate pair, with */
+/* >          ALPHAI(j+1) negative. */
+/* > */
+/* >          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */
+/* >          may easily over- or underflow, and BETA(j) may even be zero. */
+/* >          Thus, the user should avoid naively computing the ratio */
+/* >          alpha/beta.  However, ALPHAR and ALPHAI will be always less */
+/* >          than and usually comparable with norm(A) in magnitude, and */
+/* >          BETA always less than and usually comparable with norm(B). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VL */
+/* > \verbatim */
+/* >          VL is REAL array, dimension (LDVL,N) */
+/* >          If JOBVL = 'V', the left eigenvectors u(j) are stored one */
+/* >          after another in the columns of VL, in the same order as */
+/* >          their eigenvalues. If the j-th eigenvalue is real, then */
+/* >          u(j) = VL(:,j), the j-th column of VL. If the j-th and */
+/* >          (j+1)-th eigenvalues form a complex conjugate pair, then */
+/* >          u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */
+/* >          Each eigenvector is scaled so the largest component has */
+/* >          abs(real part)+abs(imag. part)=1. */
+/* >          Not referenced if JOBVL = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVL */
+/* > \verbatim */
+/* >          LDVL is INTEGER */
+/* >          The leading dimension of the matrix VL. LDVL >= 1, and */
+/* >          if JOBVL = 'V', LDVL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VR */
+/* > \verbatim */
+/* >          VR is REAL array, dimension (LDVR,N) */
+/* >          If JOBVR = 'V', the right eigenvectors v(j) are stored one */
+/* >          after another in the columns of VR, in the same order as */
+/* >          their eigenvalues. If the j-th eigenvalue is real, then */
+/* >          v(j) = VR(:,j), the j-th column of VR. If the j-th and */
+/* >          (j+1)-th eigenvalues form a complex conjugate pair, then */
+/* >          v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */
+/* >          Each eigenvector is scaled so the largest component has */
+/* >          abs(real part)+abs(imag. part)=1. */
+/* >          Not referenced if JOBVR = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVR */
+/* > \verbatim */
+/* >          LDVR is INTEGER */
+/* >          The leading dimension of the matrix VR. LDVR >= 1, and */
+/* >          if JOBVR = 'V', LDVR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,8*N). */
+/* >          For good performance, LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          = 1,...,N: */
+/* >                The QZ iteration failed.  No eigenvectors have been */
+/* >                calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */
+/* >                should be correct for j=INFO+1,...,N. */
+/* >          > N:  =N+1: other than QZ iteration failed in SHGEQZ. */
+/* >                =N+2: error return from STGEVC. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup realGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int sggev_(char *jobvl, char *jobvr, integer *n, real *a, 
+	integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real 
+	*beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, 
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, 
+	    vr_offset, i__1, i__2;
+    real r__1, r__2, r__3, r__4;
+
+    /* Local variables */
+    real anrm, bnrm;
+    integer ierr, itau;
+    real temp;
+    logical ilvl, ilvr;
+    integer iwrk;
+    extern logical lsame_(char *, char *);
+    integer ileft, icols, irows, jc;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    integer in, jr;
+    extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, integer *
+	    ), sggbal_(char *, integer *, real *, integer *, 
+	    real *, integer *, integer *, integer *, real *, real *, real *, 
+	    integer *);
+    logical ilascl, ilbscl;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgghrd_(
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, real *, integer *, 
+	    integer *);
+    logical ldumma[1];
+    char chtemp[1];
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ijobvl, iright;
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *);
+    integer ijobvr;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *), slaset_(char *, integer *, 
+	    integer *, real *, real *, real *, integer *), stgevc_(
+	    char *, char *, logical *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *, integer *, 
+	    integer *, real *, integer *);
+    real anrmto, bnrmto;
+    extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, integer *, real *
+	    , real *, real *, real *, integer *, real *, integer *, real *, 
+	    integer *, integer *);
+    integer minwrk, maxwrk;
+    real smlnum;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *);
+    logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    integer ihi, ilo;
+    real eps;
+    logical ilv;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --alphar;
+    --alphai;
+    --beta;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1 * 1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1 * 1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    if (lsame_(jobvl, "N")) {
+	ijobvl = 1;
+	ilvl = FALSE_;
+    } else if (lsame_(jobvl, "V")) {
+	ijobvl = 2;
+	ilvl = TRUE_;
+    } else {
+	ijobvl = -1;
+	ilvl = FALSE_;
+    }
+
+    if (lsame_(jobvr, "N")) {
+	ijobvr = 1;
+	ilvr = FALSE_;
+    } else if (lsame_(jobvr, "V")) {
+	ijobvr = 2;
+	ilvr = TRUE_;
+    } else {
+	ijobvr = -1;
+	ilvr = FALSE_;
+    }
+    ilv = ilvl || ilvr;
+
+/*     Test the input arguments */
+
+    *info = 0;
+    lquery = *lwork == -1;
+    if (ijobvl <= 0) {
+	*info = -1;
+    } else if (ijobvr <= 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldvl < 1 || ilvl && *ldvl < *n) {
+	*info = -12;
+    } else if (*ldvr < 1 || ilvr && *ldvr < *n) {
+	*info = -14;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. The workspace is */
+/*       computed assuming ILO = 1 and IHI = N, the worst case.) */
+
+    if (*info == 0) {
+/* Computing MAX */
+	i__1 = 1, i__2 = *n << 3;
+	minwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+	i__1 = 1, i__2 = *n * (ilaenv_(&c__1, "SGEQRF", " ", n, &c__1, n, &
+		c__0, (ftnlen)6, (ftnlen)1) + 7);
+	maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+	i__1 = maxwrk, i__2 = *n * (ilaenv_(&c__1, "SORMQR", " ", n, &c__1, n,
+		 &c__0, (ftnlen)6, (ftnlen)1) + 7);
+	maxwrk = f2cmax(i__1,i__2);
+	if (ilvl) {
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * (ilaenv_(&c__1, "SORGQR", " ", n, &
+		    c__1, n, &c_n1, (ftnlen)6, (ftnlen)1) + 7);
+	    maxwrk = f2cmax(i__1,i__2);
+	}
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -16;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGEV ", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]);
+    ilascl = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	anrmto = smlnum;
+	ilascl = TRUE_;
+    } else if (anrm > bignum) {
+	anrmto = bignum;
+	ilascl = TRUE_;
+    }
+    if (ilascl) {
+	slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]);
+    ilbscl = FALSE_;
+    if (bnrm > 0.f && bnrm < smlnum) {
+	bnrmto = smlnum;
+	ilbscl = TRUE_;
+    } else if (bnrm > bignum) {
+	bnrmto = bignum;
+	ilbscl = TRUE_;
+    }
+    if (ilbscl) {
+	slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
+		ierr);
+    }
+
+/*     Permute the matrices A, B to isolate eigenvalues if possible */
+/*     (Workspace: need 6*N) */
+
+    ileft = 1;
+    iright = *n + 1;
+    iwrk = iright + *n;
+    sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[
+	    ileft], &work[iright], &work[iwrk], &ierr);
+
+/*     Reduce B to triangular form (QR decomposition of B) */
+/*     (Workspace: need N, prefer N*NB) */
+
+    irows = ihi + 1 - ilo;
+    if (ilv) {
+	icols = *n + 1 - ilo;
+    } else {
+	icols = irows;
+    }
+    itau = iwrk;
+    iwrk = itau + irows;
+    i__1 = *lwork + 1 - iwrk;
+    sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[
+	    iwrk], &i__1, &ierr);
+
+/*     Apply the orthogonal transformation to matrix A */
+/*     (Workspace: need N, prefer N*NB) */
+
+    i__1 = *lwork + 1 - iwrk;
+    sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, &
+	    work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, &
+	    ierr);
+
+/*     Initialize VL */
+/*     (Workspace: need N, prefer N*NB) */
+
+    if (ilvl) {
+	slaset_("Full", n, n, &c_b36, &c_b37, &vl[vl_offset], ldvl)
+		;
+	if (irows > 1) {
+	    i__1 = irows - 1;
+	    i__2 = irows - 1;
+	    slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[
+		    ilo + 1 + ilo * vl_dim1], ldvl);
+	}
+	i__1 = *lwork + 1 - iwrk;
+	sorgqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[
+		itau], &work[iwrk], &i__1, &ierr);
+    }
+
+/*     Initialize VR */
+
+    if (ilvr) {
+	slaset_("Full", n, n, &c_b36, &c_b37, &vr[vr_offset], ldvr)
+		;
+    }
+
+/*     Reduce to generalized Hessenberg form */
+/*     (Workspace: none needed) */
+
+    if (ilv) {
+
+/*        Eigenvectors requested -- work on whole matrix. */
+
+	sgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], 
+		ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr);
+    } else {
+	sgghrd_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, 
+		&b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[
+		vr_offset], ldvr, &ierr);
+    }
+
+/*     Perform QZ algorithm (Compute eigenvalues, and optionally, the */
+/*     Schur forms and Schur vectors) */
+/*     (Workspace: need N) */
+
+    iwrk = itau;
+    if (ilv) {
+	*(unsigned char *)chtemp = 'S';
+    } else {
+	*(unsigned char *)chtemp = 'E';
+    }
+    i__1 = *lwork + 1 - iwrk;
+    shgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[
+	    b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], 
+	    ldvl, &vr[vr_offset], ldvr, &work[iwrk], &i__1, &ierr);
+    if (ierr != 0) {
+	if (ierr > 0 && ierr <= *n) {
+	    *info = ierr;
+	} else if (ierr > *n && ierr <= *n << 1) {
+	    *info = ierr - *n;
+	} else {
+	    *info = *n + 1;
+	}
+	goto L110;
+    }
+
+/*     Compute Eigenvectors */
+/*     (Workspace: need 6*N) */
+
+    if (ilv) {
+	if (ilvl) {
+	    if (ilvr) {
+		*(unsigned char *)chtemp = 'B';
+	    } else {
+		*(unsigned char *)chtemp = 'L';
+	    }
+	} else {
+	    *(unsigned char *)chtemp = 'R';
+	}
+	stgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, 
+		&vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[
+		iwrk], &ierr);
+	if (ierr != 0) {
+	    *info = *n + 2;
+	    goto L110;
+	}
+
+/*        Undo balancing on VL and VR and normalization */
+/*        (Workspace: none needed) */
+
+	if (ilvl) {
+	    sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &
+		    vl[vl_offset], ldvl, &ierr);
+	    i__1 = *n;
+	    for (jc = 1; jc <= i__1; ++jc) {
+		if (alphai[jc] < 0.f) {
+		    goto L50;
+		}
+		temp = 0.f;
+		if (alphai[jc] == 0.f) {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+			r__2 = temp, r__3 = (r__1 = vl[jr + jc * vl_dim1], 
+				abs(r__1));
+			temp = f2cmax(r__2,r__3);
+/* L10: */
+		    }
+		} else {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+			r__3 = temp, r__4 = (r__1 = vl[jr + jc * vl_dim1], 
+				abs(r__1)) + (r__2 = vl[jr + (jc + 1) * 
+				vl_dim1], abs(r__2));
+			temp = f2cmax(r__3,r__4);
+/* L20: */
+		    }
+		}
+		if (temp < smlnum) {
+		    goto L50;
+		}
+		temp = 1.f / temp;
+		if (alphai[jc] == 0.f) {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+			vl[jr + jc * vl_dim1] *= temp;
+/* L30: */
+		    }
+		} else {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+			vl[jr + jc * vl_dim1] *= temp;
+			vl[jr + (jc + 1) * vl_dim1] *= temp;
+/* L40: */
+		    }
+		}
+L50:
+		;
+	    }
+	}
+	if (ilvr) {
+	    sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &
+		    vr[vr_offset], ldvr, &ierr);
+	    i__1 = *n;
+	    for (jc = 1; jc <= i__1; ++jc) {
+		if (alphai[jc] < 0.f) {
+		    goto L100;
+		}
+		temp = 0.f;
+		if (alphai[jc] == 0.f) {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+			r__2 = temp, r__3 = (r__1 = vr[jr + jc * vr_dim1], 
+				abs(r__1));
+			temp = f2cmax(r__2,r__3);
+/* L60: */
+		    }
+		} else {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+			r__3 = temp, r__4 = (r__1 = vr[jr + jc * vr_dim1], 
+				abs(r__1)) + (r__2 = vr[jr + (jc + 1) * 
+				vr_dim1], abs(r__2));
+			temp = f2cmax(r__3,r__4);
+/* L70: */
+		    }
+		}
+		if (temp < smlnum) {
+		    goto L100;
+		}
+		temp = 1.f / temp;
+		if (alphai[jc] == 0.f) {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+			vr[jr + jc * vr_dim1] *= temp;
+/* L80: */
+		    }
+		} else {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+			vr[jr + jc * vr_dim1] *= temp;
+			vr[jr + (jc + 1) * vr_dim1] *= temp;
+/* L90: */
+		    }
+		}
+L100:
+		;
+	    }
+	}
+
+/*        End of eigenvector calculation */
+
+    }
+
+/*     Undo scaling if necessary */
+
+L110:
+
+    if (ilascl) {
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &
+		ierr);
+    }
+
+    if (ilbscl) {
+	slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
+		ierr);
+    }
+
+    work[1] = (real) maxwrk;
+    return 0;
+
+/*     End of SGGEV */
+
+} /* sggev_ */
+
diff --git a/lapack-netlib/SRC/sggev3.c b/lapack-netlib/SRC/sggev3.c
new file mode 100644
index 000000000..956ba4391
--- /dev/null
+++ b/lapack-netlib/SRC/sggev3.c
@@ -0,0 +1,1111 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__1 = 1;
+static integer c__0 = 0;
+static real c_b34 = 0.f;
+static real c_b35 = 1.f;
+
+/* > \brief <b> SGGEV3 computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat
+rices (blocked algorithm)</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGEV3 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggev3.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggev3.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggev3.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, */
+/*      $                   ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, LWORK, */
+/*      $                   INFO ) */
+
+/*       CHARACTER          JOBVL, JOBVR */
+/*       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N */
+/*       REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */
+/*      $                   B( LDB, * ), BETA( * ), VL( LDVL, * ), */
+/*      $                   VR( LDVR, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGEV3 computes for a pair of N-by-N real nonsymmetric matrices (A,B) */
+/* > the generalized eigenvalues, and optionally, the left and/or right */
+/* > generalized eigenvectors. */
+/* > */
+/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */
+/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */
+/* > singular. It is usually represented as the pair (alpha,beta), as */
+/* > there is a reasonable interpretation for beta=0, and even for both */
+/* > being zero. */
+/* > */
+/* > The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */
+/* > of (A,B) satisfies */
+/* > */
+/* >                  A * v(j) = lambda(j) * B * v(j). */
+/* > */
+/* > The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */
+/* > of (A,B) satisfies */
+/* > */
+/* >                  u(j)**H * A  = lambda(j) * u(j)**H * B . */
+/* > */
+/* > where u(j)**H is the conjugate-transpose of u(j). */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVL */
+/* > \verbatim */
+/* >          JOBVL is CHARACTER*1 */
+/* >          = 'N':  do not compute the left generalized eigenvectors; */
+/* >          = 'V':  compute the left generalized eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVR */
+/* > \verbatim */
+/* >          JOBVR is CHARACTER*1 */
+/* >          = 'N':  do not compute the right generalized eigenvectors; */
+/* >          = 'V':  compute the right generalized eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices A, B, VL, and VR.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the matrix A in the pair (A,B). */
+/* >          On exit, A has been overwritten. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB, N) */
+/* >          On entry, the matrix B in the pair (A,B). */
+/* >          On exit, B has been overwritten. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAR */
+/* > \verbatim */
+/* >          ALPHAR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAI */
+/* > \verbatim */
+/* >          ALPHAI is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BETA */
+/* > \verbatim */
+/* >          BETA is REAL array, dimension (N) */
+/* >          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */
+/* >          be the generalized eigenvalues.  If ALPHAI(j) is zero, then */
+/* >          the j-th eigenvalue is real; if positive, then the j-th and */
+/* >          (j+1)-st eigenvalues are a complex conjugate pair, with */
+/* >          ALPHAI(j+1) negative. */
+/* > */
+/* >          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */
+/* >          may easily over- or underflow, and BETA(j) may even be zero. */
+/* >          Thus, the user should avoid naively computing the ratio */
+/* >          alpha/beta.  However, ALPHAR and ALPHAI will be always less */
+/* >          than and usually comparable with norm(A) in magnitude, and */
+/* >          BETA always less than and usually comparable with norm(B). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VL */
+/* > \verbatim */
+/* >          VL is REAL array, dimension (LDVL,N) */
+/* >          If JOBVL = 'V', the left eigenvectors u(j) are stored one */
+/* >          after another in the columns of VL, in the same order as */
+/* >          their eigenvalues. If the j-th eigenvalue is real, then */
+/* >          u(j) = VL(:,j), the j-th column of VL. If the j-th and */
+/* >          (j+1)-th eigenvalues form a complex conjugate pair, then */
+/* >          u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */
+/* >          Each eigenvector is scaled so the largest component has */
+/* >          abs(real part)+abs(imag. part)=1. */
+/* >          Not referenced if JOBVL = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVL */
+/* > \verbatim */
+/* >          LDVL is INTEGER */
+/* >          The leading dimension of the matrix VL. LDVL >= 1, and */
+/* >          if JOBVL = 'V', LDVL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VR */
+/* > \verbatim */
+/* >          VR is REAL array, dimension (LDVR,N) */
+/* >          If JOBVR = 'V', the right eigenvectors v(j) are stored one */
+/* >          after another in the columns of VR, in the same order as */
+/* >          their eigenvalues. If the j-th eigenvalue is real, then */
+/* >          v(j) = VR(:,j), the j-th column of VR. If the j-th and */
+/* >          (j+1)-th eigenvalues form a complex conjugate pair, then */
+/* >          v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */
+/* >          Each eigenvector is scaled so the largest component has */
+/* >          abs(real part)+abs(imag. part)=1. */
+/* >          Not referenced if JOBVR = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVR */
+/* > \verbatim */
+/* >          LDVR is INTEGER */
+/* >          The leading dimension of the matrix VR. LDVR >= 1, and */
+/* >          if JOBVR = 'V', LDVR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          = 1,...,N: */
+/* >                The QZ iteration failed.  No eigenvectors have been */
+/* >                calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */
+/* >                should be correct for j=INFO+1,...,N. */
+/* >          > N:  =N+1: other than QZ iteration failed in SHGEQZ. */
+/* >                =N+2: error return from STGEVC. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date January 2015 */
+
+/* > \ingroup realGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int sggev3_(char *jobvl, char *jobvr, integer *n, real *a, 
+	integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real 
+	*beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, 
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, 
+	    vr_offset, i__1, i__2;
+    real r__1, r__2, r__3, r__4;
+
+    /* Local variables */
+    real anrm, bnrm;
+    integer ierr, itau;
+    real temp;
+    logical ilvl, ilvr;
+    integer iwrk;
+    extern logical lsame_(char *, char *);
+    integer ileft, icols, irows;
+    extern /* Subroutine */ int sgghd3_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, integer *)
+	    ;
+    integer jc;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    integer in, jr;
+    extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, integer *
+	    ), sggbal_(char *, integer *, real *, integer *, 
+	    real *, integer *, integer *, integer *, real *, real *, real *, 
+	    integer *);
+    logical ilascl, ilbscl;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical ldumma[1];
+    char chtemp[1];
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    integer ijobvl, iright;
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *);
+    integer ijobvr;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *), slaset_(char *, integer *, 
+	    integer *, real *, real *, real *, integer *), stgevc_(
+	    char *, char *, logical *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, real *, integer *, integer *, 
+	    integer *, real *, integer *);
+    real anrmto, bnrmto;
+    extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, integer *, real *
+	    , real *, real *, real *, integer *, real *, integer *, real *, 
+	    integer *, integer *);
+    real smlnum;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *);
+    integer lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    integer ihi, ilo;
+    real eps;
+    logical ilv;
+
+
+/*  -- LAPACK driver routine (version 3.6.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     January 2015 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --alphar;
+    --alphai;
+    --beta;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1 * 1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1 * 1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    if (lsame_(jobvl, "N")) {
+	ijobvl = 1;
+	ilvl = FALSE_;
+    } else if (lsame_(jobvl, "V")) {
+	ijobvl = 2;
+	ilvl = TRUE_;
+    } else {
+	ijobvl = -1;
+	ilvl = FALSE_;
+    }
+
+    if (lsame_(jobvr, "N")) {
+	ijobvr = 1;
+	ilvr = FALSE_;
+    } else if (lsame_(jobvr, "V")) {
+	ijobvr = 2;
+	ilvr = TRUE_;
+    } else {
+	ijobvr = -1;
+	ilvr = FALSE_;
+    }
+    ilv = ilvl || ilvr;
+
+/*     Test the input arguments */
+
+    *info = 0;
+    lquery = *lwork == -1;
+    if (ijobvl <= 0) {
+	*info = -1;
+    } else if (ijobvr <= 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldvl < 1 || ilvl && *ldvl < *n) {
+	*info = -12;
+    } else if (*ldvr < 1 || ilvr && *ldvr < *n) {
+	*info = -14;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = *n << 3;
+	if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
+	    *info = -16;
+	}
+    }
+
+/*     Compute workspace */
+
+    if (*info == 0) {
+	sgeqrf_(n, n, &b[b_offset], ldb, &work[1], &work[1], &c_n1, &ierr);
+/* Computing MAX */
+	i__1 = 1, i__2 = *n << 3, i__1 = f2cmax(i__1,i__2), i__2 = *n * 3 + (
+		integer) work[1];
+	lwkopt = f2cmax(i__1,i__2);
+	sormqr_("L", "T", n, n, n, &b[b_offset], ldb, &work[1], &a[a_offset], 
+		lda, &work[1], &c_n1, &ierr);
+/* Computing MAX */
+	i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1];
+	lwkopt = f2cmax(i__1,i__2);
+	sgghd3_(jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[b_offset], 
+		ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[1], &
+		c_n1, &ierr);
+/* Computing MAX */
+	i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1];
+	lwkopt = f2cmax(i__1,i__2);
+	if (ilvl) {
+	    sorgqr_(n, n, n, &vl[vl_offset], ldvl, &work[1], &work[1], &c_n1, 
+		    &ierr);
+/* Computing MAX */
+	    i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1];
+	    lwkopt = f2cmax(i__1,i__2);
+	    shgeqz_("S", jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[
+		    b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[
+		    vl_offset], ldvl, &vr[vr_offset], ldvr, &work[1], &c_n1, &
+		    ierr);
+/* Computing MAX */
+	    i__1 = lwkopt, i__2 = (*n << 1) + (integer) work[1];
+	    lwkopt = f2cmax(i__1,i__2);
+	} else {
+	    shgeqz_("E", jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[
+		    b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[
+		    vl_offset], ldvl, &vr[vr_offset], ldvr, &work[1], &c_n1, &
+		    ierr);
+/* Computing MAX */
+	    i__1 = lwkopt, i__2 = (*n << 1) + (integer) work[1];
+	    lwkopt = f2cmax(i__1,i__2);
+	}
+	work[1] = (real) lwkopt;
+
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGEV3 ", &i__1, (ftnlen)7);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]);
+    ilascl = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	anrmto = smlnum;
+	ilascl = TRUE_;
+    } else if (anrm > bignum) {
+	anrmto = bignum;
+	ilascl = TRUE_;
+    }
+    if (ilascl) {
+	slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]);
+    ilbscl = FALSE_;
+    if (bnrm > 0.f && bnrm < smlnum) {
+	bnrmto = smlnum;
+	ilbscl = TRUE_;
+    } else if (bnrm > bignum) {
+	bnrmto = bignum;
+	ilbscl = TRUE_;
+    }
+    if (ilbscl) {
+	slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
+		ierr);
+    }
+
+/*     Permute the matrices A, B to isolate eigenvalues if possible */
+
+    ileft = 1;
+    iright = *n + 1;
+    iwrk = iright + *n;
+    sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[
+	    ileft], &work[iright], &work[iwrk], &ierr);
+
+/*     Reduce B to triangular form (QR decomposition of B) */
+
+    irows = ihi + 1 - ilo;
+    if (ilv) {
+	icols = *n + 1 - ilo;
+    } else {
+	icols = irows;
+    }
+    itau = iwrk;
+    iwrk = itau + irows;
+    i__1 = *lwork + 1 - iwrk;
+    sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[
+	    iwrk], &i__1, &ierr);
+
+/*     Apply the orthogonal transformation to matrix A */
+
+    i__1 = *lwork + 1 - iwrk;
+    sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, &
+	    work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, &
+	    ierr);
+
+/*     Initialize VL */
+
+    if (ilvl) {
+	slaset_("Full", n, n, &c_b34, &c_b35, &vl[vl_offset], ldvl)
+		;
+	if (irows > 1) {
+	    i__1 = irows - 1;
+	    i__2 = irows - 1;
+	    slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[
+		    ilo + 1 + ilo * vl_dim1], ldvl);
+	}
+	i__1 = *lwork + 1 - iwrk;
+	sorgqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[
+		itau], &work[iwrk], &i__1, &ierr);
+    }
+
+/*     Initialize VR */
+
+    if (ilvr) {
+	slaset_("Full", n, n, &c_b34, &c_b35, &vr[vr_offset], ldvr)
+		;
+    }
+
+/*     Reduce to generalized Hessenberg form */
+
+    if (ilv) {
+
+/*        Eigenvectors requested -- work on whole matrix. */
+
+	i__1 = *lwork + 1 - iwrk;
+	sgghd3_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], 
+		ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[iwrk], 
+		&i__1, &ierr);
+    } else {
+	i__1 = *lwork + 1 - iwrk;
+	sgghd3_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, 
+		&b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, &ierr);
+    }
+
+/*     Perform QZ algorithm (Compute eigenvalues, and optionally, the */
+/*     Schur forms and Schur vectors) */
+
+    iwrk = itau;
+    if (ilv) {
+	*(unsigned char *)chtemp = 'S';
+    } else {
+	*(unsigned char *)chtemp = 'E';
+    }
+    i__1 = *lwork + 1 - iwrk;
+    shgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[
+	    b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], 
+	    ldvl, &vr[vr_offset], ldvr, &work[iwrk], &i__1, &ierr);
+    if (ierr != 0) {
+	if (ierr > 0 && ierr <= *n) {
+	    *info = ierr;
+	} else if (ierr > *n && ierr <= *n << 1) {
+	    *info = ierr - *n;
+	} else {
+	    *info = *n + 1;
+	}
+	goto L110;
+    }
+
+/*     Compute Eigenvectors */
+
+    if (ilv) {
+	if (ilvl) {
+	    if (ilvr) {
+		*(unsigned char *)chtemp = 'B';
+	    } else {
+		*(unsigned char *)chtemp = 'L';
+	    }
+	} else {
+	    *(unsigned char *)chtemp = 'R';
+	}
+	stgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, 
+		&vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[
+		iwrk], &ierr);
+	if (ierr != 0) {
+	    *info = *n + 2;
+	    goto L110;
+	}
+
+/*        Undo balancing on VL and VR and normalization */
+
+	if (ilvl) {
+	    sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &
+		    vl[vl_offset], ldvl, &ierr);
+	    i__1 = *n;
+	    for (jc = 1; jc <= i__1; ++jc) {
+		if (alphai[jc] < 0.f) {
+		    goto L50;
+		}
+		temp = 0.f;
+		if (alphai[jc] == 0.f) {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+			r__2 = temp, r__3 = (r__1 = vl[jr + jc * vl_dim1], 
+				abs(r__1));
+			temp = f2cmax(r__2,r__3);
+/* L10: */
+		    }
+		} else {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+			r__3 = temp, r__4 = (r__1 = vl[jr + jc * vl_dim1], 
+				abs(r__1)) + (r__2 = vl[jr + (jc + 1) * 
+				vl_dim1], abs(r__2));
+			temp = f2cmax(r__3,r__4);
+/* L20: */
+		    }
+		}
+		if (temp < smlnum) {
+		    goto L50;
+		}
+		temp = 1.f / temp;
+		if (alphai[jc] == 0.f) {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+			vl[jr + jc * vl_dim1] *= temp;
+/* L30: */
+		    }
+		} else {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+			vl[jr + jc * vl_dim1] *= temp;
+			vl[jr + (jc + 1) * vl_dim1] *= temp;
+/* L40: */
+		    }
+		}
+L50:
+		;
+	    }
+	}
+	if (ilvr) {
+	    sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &
+		    vr[vr_offset], ldvr, &ierr);
+	    i__1 = *n;
+	    for (jc = 1; jc <= i__1; ++jc) {
+		if (alphai[jc] < 0.f) {
+		    goto L100;
+		}
+		temp = 0.f;
+		if (alphai[jc] == 0.f) {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+			r__2 = temp, r__3 = (r__1 = vr[jr + jc * vr_dim1], 
+				abs(r__1));
+			temp = f2cmax(r__2,r__3);
+/* L60: */
+		    }
+		} else {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+			r__3 = temp, r__4 = (r__1 = vr[jr + jc * vr_dim1], 
+				abs(r__1)) + (r__2 = vr[jr + (jc + 1) * 
+				vr_dim1], abs(r__2));
+			temp = f2cmax(r__3,r__4);
+/* L70: */
+		    }
+		}
+		if (temp < smlnum) {
+		    goto L100;
+		}
+		temp = 1.f / temp;
+		if (alphai[jc] == 0.f) {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+			vr[jr + jc * vr_dim1] *= temp;
+/* L80: */
+		    }
+		} else {
+		    i__2 = *n;
+		    for (jr = 1; jr <= i__2; ++jr) {
+			vr[jr + jc * vr_dim1] *= temp;
+			vr[jr + (jc + 1) * vr_dim1] *= temp;
+/* L90: */
+		    }
+		}
+L100:
+		;
+	    }
+	}
+
+/*        End of eigenvector calculation */
+
+    }
+
+/*     Undo scaling if necessary */
+
+L110:
+
+    if (ilascl) {
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &
+		ierr);
+    }
+
+    if (ilbscl) {
+	slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
+		ierr);
+    }
+
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SGGEV3 */
+
+} /* sggev3_ */
+
diff --git a/lapack-netlib/SRC/sggevx.c b/lapack-netlib/SRC/sggevx.c
new file mode 100644
index 000000000..644b09bf6
--- /dev/null
+++ b/lapack-netlib/SRC/sggevx.c
@@ -0,0 +1,1385 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static real c_b57 = 0.f;
+static real c_b58 = 1.f;
+
+/* > \brief <b> SGGEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat
+rices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGEVX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggevx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggevx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggevx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, */
+/*                          ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO, */
+/*                          IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, */
+/*                          RCONDV, WORK, LWORK, IWORK, BWORK, INFO ) */
+
+/*       CHARACTER          BALANC, JOBVL, JOBVR, SENSE */
+/*       INTEGER            IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N */
+/*       REAL               ABNRM, BBNRM */
+/*       LOGICAL            BWORK( * ) */
+/*       INTEGER            IWORK( * ) */
+/*       REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */
+/*      $                   B( LDB, * ), BETA( * ), LSCALE( * ), */
+/*      $                   RCONDE( * ), RCONDV( * ), RSCALE( * ), */
+/*      $                   VL( LDVL, * ), VR( LDVR, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B) */
+/* > the generalized eigenvalues, and optionally, the left and/or right */
+/* > generalized eigenvectors. */
+/* > */
+/* > Optionally also, it computes a balancing transformation to improve */
+/* > the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */
+/* > LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for */
+/* > the eigenvalues (RCONDE), and reciprocal condition numbers for the */
+/* > right eigenvectors (RCONDV). */
+/* > */
+/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */
+/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */
+/* > singular. It is usually represented as the pair (alpha,beta), as */
+/* > there is a reasonable interpretation for beta=0, and even for both */
+/* > being zero. */
+/* > */
+/* > The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */
+/* > of (A,B) satisfies */
+/* > */
+/* >                  A * v(j) = lambda(j) * B * v(j) . */
+/* > */
+/* > The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */
+/* > of (A,B) satisfies */
+/* > */
+/* >                  u(j)**H * A  = lambda(j) * u(j)**H * B. */
+/* > */
+/* > where u(j)**H is the conjugate-transpose of u(j). */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] BALANC */
+/* > \verbatim */
+/* >          BALANC is CHARACTER*1 */
+/* >          Specifies the balance option to be performed. */
+/* >          = 'N':  do not diagonally scale or permute; */
+/* >          = 'P':  permute only; */
+/* >          = 'S':  scale only; */
+/* >          = 'B':  both permute and scale. */
+/* >          Computed reciprocal condition numbers will be for the */
+/* >          matrices after permuting and/or balancing. Permuting does */
+/* >          not change condition numbers (in exact arithmetic), but */
+/* >          balancing does. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVL */
+/* > \verbatim */
+/* >          JOBVL is CHARACTER*1 */
+/* >          = 'N':  do not compute the left generalized eigenvectors; */
+/* >          = 'V':  compute the left generalized eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVR */
+/* > \verbatim */
+/* >          JOBVR is CHARACTER*1 */
+/* >          = 'N':  do not compute the right generalized eigenvectors; */
+/* >          = 'V':  compute the right generalized eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SENSE */
+/* > \verbatim */
+/* >          SENSE is CHARACTER*1 */
+/* >          Determines which reciprocal condition numbers are computed. */
+/* >          = 'N': none are computed; */
+/* >          = 'E': computed for eigenvalues only; */
+/* >          = 'V': computed for eigenvectors only; */
+/* >          = 'B': computed for eigenvalues and eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices A, B, VL, and VR.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the matrix A in the pair (A,B). */
+/* >          On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' */
+/* >          or both, then A contains the first part of the real Schur */
+/* >          form of the "balanced" versions of the input A and B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB, N) */
+/* >          On entry, the matrix B in the pair (A,B). */
+/* >          On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' */
+/* >          or both, then B contains the second part of the real Schur */
+/* >          form of the "balanced" versions of the input A and B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAR */
+/* > \verbatim */
+/* >          ALPHAR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAI */
+/* > \verbatim */
+/* >          ALPHAI is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BETA */
+/* > \verbatim */
+/* >          BETA is REAL array, dimension (N) */
+/* >          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */
+/* >          be the generalized eigenvalues.  If ALPHAI(j) is zero, then */
+/* >          the j-th eigenvalue is real; if positive, then the j-th and */
+/* >          (j+1)-st eigenvalues are a complex conjugate pair, with */
+/* >          ALPHAI(j+1) negative. */
+/* > */
+/* >          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */
+/* >          may easily over- or underflow, and BETA(j) may even be zero. */
+/* >          Thus, the user should avoid naively computing the ratio */
+/* >          ALPHA/BETA. However, ALPHAR and ALPHAI will be always less */
+/* >          than and usually comparable with norm(A) in magnitude, and */
+/* >          BETA always less than and usually comparable with norm(B). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VL */
+/* > \verbatim */
+/* >          VL is REAL array, dimension (LDVL,N) */
+/* >          If JOBVL = 'V', the left eigenvectors u(j) are stored one */
+/* >          after another in the columns of VL, in the same order as */
+/* >          their eigenvalues. If the j-th eigenvalue is real, then */
+/* >          u(j) = VL(:,j), the j-th column of VL. If the j-th and */
+/* >          (j+1)-th eigenvalues form a complex conjugate pair, then */
+/* >          u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */
+/* >          Each eigenvector will be scaled so the largest component have */
+/* >          abs(real part) + abs(imag. part) = 1. */
+/* >          Not referenced if JOBVL = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVL */
+/* > \verbatim */
+/* >          LDVL is INTEGER */
+/* >          The leading dimension of the matrix VL. LDVL >= 1, and */
+/* >          if JOBVL = 'V', LDVL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VR */
+/* > \verbatim */
+/* >          VR is REAL array, dimension (LDVR,N) */
+/* >          If JOBVR = 'V', the right eigenvectors v(j) are stored one */
+/* >          after another in the columns of VR, in the same order as */
+/* >          their eigenvalues. If the j-th eigenvalue is real, then */
+/* >          v(j) = VR(:,j), the j-th column of VR. If the j-th and */
+/* >          (j+1)-th eigenvalues form a complex conjugate pair, then */
+/* >          v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */
+/* >          Each eigenvector will be scaled so the largest component have */
+/* >          abs(real part) + abs(imag. part) = 1. */
+/* >          Not referenced if JOBVR = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVR */
+/* > \verbatim */
+/* >          LDVR is INTEGER */
+/* >          The leading dimension of the matrix VR. LDVR >= 1, and */
+/* >          if JOBVR = 'V', LDVR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          ILO and IHI are integer values such that on exit */
+/* >          A(i,j) = 0 and B(i,j) = 0 if i > j and */
+/* >          j = 1,...,ILO-1 or i = IHI+1,...,N. */
+/* >          If BALANC = 'N' or 'S', ILO = 1 and IHI = N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] LSCALE */
+/* > \verbatim */
+/* >          LSCALE is REAL array, dimension (N) */
+/* >          Details of the permutations and scaling factors applied */
+/* >          to the left side of A and B.  If PL(j) is the index of the */
+/* >          row interchanged with row j, and DL(j) is the scaling */
+/* >          factor applied to row j, then */
+/* >            LSCALE(j) = PL(j)  for j = 1,...,ILO-1 */
+/* >                      = DL(j)  for j = ILO,...,IHI */
+/* >                      = PL(j)  for j = IHI+1,...,N. */
+/* >          The order in which the interchanges are made is N to IHI+1, */
+/* >          then 1 to ILO-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RSCALE */
+/* > \verbatim */
+/* >          RSCALE is REAL array, dimension (N) */
+/* >          Details of the permutations and scaling factors applied */
+/* >          to the right side of A and B.  If PR(j) is the index of the */
+/* >          column interchanged with column j, and DR(j) is the scaling */
+/* >          factor applied to column j, then */
+/* >            RSCALE(j) = PR(j)  for j = 1,...,ILO-1 */
+/* >                      = DR(j)  for j = ILO,...,IHI */
+/* >                      = PR(j)  for j = IHI+1,...,N */
+/* >          The order in which the interchanges are made is N to IHI+1, */
+/* >          then 1 to ILO-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ABNRM */
+/* > \verbatim */
+/* >          ABNRM is REAL */
+/* >          The one-norm of the balanced matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BBNRM */
+/* > \verbatim */
+/* >          BBNRM is REAL */
+/* >          The one-norm of the balanced matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDE */
+/* > \verbatim */
+/* >          RCONDE is REAL array, dimension (N) */
+/* >          If SENSE = 'E' or 'B', the reciprocal condition numbers of */
+/* >          the eigenvalues, stored in consecutive elements of the array. */
+/* >          For a complex conjugate pair of eigenvalues two consecutive */
+/* >          elements of RCONDE are set to the same value. Thus RCONDE(j), */
+/* >          RCONDV(j), and the j-th columns of VL and VR all correspond */
+/* >          to the j-th eigenpair. */
+/* >          If SENSE = 'N' or 'V', RCONDE is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDV */
+/* > \verbatim */
+/* >          RCONDV is REAL array, dimension (N) */
+/* >          If SENSE = 'V' or 'B', the estimated reciprocal condition */
+/* >          numbers of the eigenvectors, stored in consecutive elements */
+/* >          of the array. For a complex eigenvector two consecutive */
+/* >          elements of RCONDV are set to the same value. If the */
+/* >          eigenvalues cannot be reordered to compute RCONDV(j), */
+/* >          RCONDV(j) is set to 0; this can only occur when the true */
+/* >          value would be very small anyway. */
+/* >          If SENSE = 'N' or 'E', RCONDV is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= f2cmax(1,2*N). */
+/* >          If BALANC = 'S' or 'B', or JOBVL = 'V', or JOBVR = 'V', */
+/* >          LWORK >= f2cmax(1,6*N). */
+/* >          If SENSE = 'E', LWORK >= f2cmax(1,10*N). */
+/* >          If SENSE = 'V' or 'B', LWORK >= 2*N*N+8*N+16. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N+6) */
+/* >          If SENSE = 'E', IWORK is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BWORK */
+/* > \verbatim */
+/* >          BWORK is LOGICAL array, dimension (N) */
+/* >          If SENSE = 'N', BWORK is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          = 1,...,N: */
+/* >                The QZ iteration failed.  No eigenvectors have been */
+/* >                calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */
+/* >                should be correct for j=INFO+1,...,N. */
+/* >          > N:  =N+1: other than QZ iteration failed in SHGEQZ. */
+/* >                =N+2: error return from STGEVC. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup realGEeigen */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  Balancing a matrix pair (A,B) includes, first, permuting rows and */
+/* >  columns to isolate eigenvalues, second, applying diagonal similarity */
+/* >  transformation to the rows and columns to make the rows and columns */
+/* >  as close in norm as possible. The computed reciprocal condition */
+/* >  numbers correspond to the balanced matrix. Permuting rows and columns */
+/* >  will not change the condition numbers (in exact arithmetic) but */
+/* >  diagonal scaling will.  For further explanation of balancing, see */
+/* >  section 4.11.1.2 of LAPACK Users' Guide. */
+/* > */
+/* >  An approximate error bound on the chordal distance between the i-th */
+/* >  computed generalized eigenvalue w and the corresponding exact */
+/* >  eigenvalue lambda is */
+/* > */
+/* >       chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) */
+/* > */
+/* >  An approximate error bound for the angle between the i-th computed */
+/* >  eigenvector VL(i) or VR(i) is given by */
+/* > */
+/* >       EPS * norm(ABNRM, BBNRM) / DIF(i). */
+/* > */
+/* >  For further explanation of the reciprocal condition numbers RCONDE */
+/* >  and RCONDV, see section 4.11 of LAPACK User's Guide. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sggevx_(char *balanc, char *jobvl, char *jobvr, char *
+	sense, integer *n, real *a, integer *lda, real *b, integer *ldb, real 
+	*alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, 
+	integer *ldvr, integer *ilo, integer *ihi, real *lscale, real *rscale,
+	 real *abnrm, real *bbnrm, real *rconde, real *rcondv, real *work, 
+	integer *lwork, integer *iwork, logical *bwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, 
+	    vr_offset, i__1, i__2;
+    real r__1, r__2, r__3, r__4;
+
+    /* Local variables */
+    logical pair;
+    real anrm, bnrm;
+    integer ierr, itau;
+    real temp;
+    logical ilvl, ilvr;
+    integer iwrk, iwrk1, i__, j, m;
+    extern logical lsame_(char *, char *);
+    integer icols;
+    logical noscl;
+    integer irows, jc;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    integer in, mm, jr;
+    extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, integer *
+	    ), sggbal_(char *, integer *, real *, integer *, 
+	    real *, integer *, integer *, integer *, real *, real *, real *, 
+	    integer *);
+    logical ilascl, ilbscl;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgghrd_(
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, real *, integer *, 
+	    integer *);
+    logical ldumma[1];
+    char chtemp[1];
+    real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern real slamch_(char *);
+    integer ijobvl;
+    extern real slange_(char *, integer *, integer *, real *, integer *, real 
+	    *);
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *);
+    integer ijobvr;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *);
+    logical wantsb;
+    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, 
+	    real *, real *, integer *);
+    real anrmto;
+    logical wantse;
+    real bnrmto;
+    extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, integer *, real *
+	    , real *, real *, real *, integer *, real *, integer *, real *, 
+	    integer *, integer *), stgevc_(char *, 
+	    char *, logical *, integer *, real *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, integer *, integer *, 
+	    real *, integer *), stgsna_(char *, char *, 
+	    logical *, integer *, real *, integer *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, real *, integer *, 
+	    integer *, real *, integer *, integer *, integer *);
+    integer minwrk, maxwrk;
+    logical wantsn;
+    real smlnum;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, integer *);
+    logical lquery, wantsv;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    real eps;
+    logical ilv;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --alphar;
+    --alphai;
+    --beta;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1 * 1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1 * 1;
+    vr -= vr_offset;
+    --lscale;
+    --rscale;
+    --rconde;
+    --rcondv;
+    --work;
+    --iwork;
+    --bwork;
+
+    /* Function Body */
+    if (lsame_(jobvl, "N")) {
+	ijobvl = 1;
+	ilvl = FALSE_;
+    } else if (lsame_(jobvl, "V")) {
+	ijobvl = 2;
+	ilvl = TRUE_;
+    } else {
+	ijobvl = -1;
+	ilvl = FALSE_;
+    }
+
+    if (lsame_(jobvr, "N")) {
+	ijobvr = 1;
+	ilvr = FALSE_;
+    } else if (lsame_(jobvr, "V")) {
+	ijobvr = 2;
+	ilvr = TRUE_;
+    } else {
+	ijobvr = -1;
+	ilvr = FALSE_;
+    }
+    ilv = ilvl || ilvr;
+
+    noscl = lsame_(balanc, "N") || lsame_(balanc, "P");
+    wantsn = lsame_(sense, "N");
+    wantse = lsame_(sense, "E");
+    wantsv = lsame_(sense, "V");
+    wantsb = lsame_(sense, "B");
+
+/*     Test the input arguments */
+
+    *info = 0;
+    lquery = *lwork == -1;
+    if (! (noscl || lsame_(balanc, "S") || lsame_(
+	    balanc, "B"))) {
+	*info = -1;
+    } else if (ijobvl <= 0) {
+	*info = -2;
+    } else if (ijobvr <= 0) {
+	*info = -3;
+    } else if (! (wantsn || wantse || wantsb || wantsv)) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (*ldvl < 1 || ilvl && *ldvl < *n) {
+	*info = -14;
+    } else if (*ldvr < 1 || ilvr && *ldvr < *n) {
+	*info = -16;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. The workspace is */
+/*       computed assuming ILO = 1 and IHI = N, the worst case.) */
+
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
+	} else {
+	    if (noscl && ! ilv) {
+		minwrk = *n << 1;
+	    } else {
+		minwrk = *n * 6;
+	    }
+	    if (wantse) {
+		minwrk = *n * 10;
+	    } else if (wantsv || wantsb) {
+		minwrk = (*n << 1) * (*n + 4) + 16;
+	    }
+	    maxwrk = minwrk;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", n, &
+		    c__1, n, &c__0, (ftnlen)6, (ftnlen)1);
+	    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SORMQR", " ", n, &
+		    c__1, n, &c__0, (ftnlen)6, (ftnlen)1);
+	    maxwrk = f2cmax(i__1,i__2);
+	    if (ilvl) {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR", 
+			" ", n, &c__1, n, &c__0, (ftnlen)6, (ftnlen)1);
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	}
+	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -26;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGEVX", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]);
+    ilascl = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	anrmto = smlnum;
+	ilascl = TRUE_;
+    } else if (anrm > bignum) {
+	anrmto = bignum;
+	ilascl = TRUE_;
+    }
+    if (ilascl) {
+	slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]);
+    ilbscl = FALSE_;
+    if (bnrm > 0.f && bnrm < smlnum) {
+	bnrmto = smlnum;
+	ilbscl = TRUE_;
+    } else if (bnrm > bignum) {
+	bnrmto = bignum;
+	ilbscl = TRUE_;
+    }
+    if (ilbscl) {
+	slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
+		ierr);
+    }
+
+/*     Permute and/or balance the matrix pair (A,B) */
+/*     (Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) */
+
+    sggbal_(balanc, n, &a[a_offset], lda, &b[b_offset], ldb, ilo, ihi, &
+	    lscale[1], &rscale[1], &work[1], &ierr);
+
+/*     Compute ABNRM and BBNRM */
+
+    *abnrm = slange_("1", n, n, &a[a_offset], lda, &work[1]);
+    if (ilascl) {
+	work[1] = *abnrm;
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, &c__1, &c__1, &work[1], &
+		c__1, &ierr);
+	*abnrm = work[1];
+    }
+
+    *bbnrm = slange_("1", n, n, &b[b_offset], ldb, &work[1]);
+    if (ilbscl) {
+	work[1] = *bbnrm;
+	slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, &c__1, &c__1, &work[1], &
+		c__1, &ierr);
+	*bbnrm = work[1];
+    }
+
+/*     Reduce B to triangular form (QR decomposition of B) */
+/*     (Workspace: need N, prefer N*NB ) */
+
+    irows = *ihi + 1 - *ilo;
+    if (ilv || ! wantsn) {
+	icols = *n + 1 - *ilo;
+    } else {
+	icols = irows;
+    }
+    itau = 1;
+    iwrk = itau + irows;
+    i__1 = *lwork + 1 - iwrk;
+    sgeqrf_(&irows, &icols, &b[*ilo + *ilo * b_dim1], ldb, &work[itau], &work[
+	    iwrk], &i__1, &ierr);
+
+/*     Apply the orthogonal transformation to A */
+/*     (Workspace: need N, prefer N*NB) */
+
+    i__1 = *lwork + 1 - iwrk;
+    sormqr_("L", "T", &irows, &icols, &irows, &b[*ilo + *ilo * b_dim1], ldb, &
+	    work[itau], &a[*ilo + *ilo * a_dim1], lda, &work[iwrk], &i__1, &
+	    ierr);
+
+/*     Initialize VL and/or VR */
+/*     (Workspace: need N, prefer N*NB) */
+
+    if (ilvl) {
+	slaset_("Full", n, n, &c_b57, &c_b58, &vl[vl_offset], ldvl)
+		;
+	if (irows > 1) {
+	    i__1 = irows - 1;
+	    i__2 = irows - 1;
+	    slacpy_("L", &i__1, &i__2, &b[*ilo + 1 + *ilo * b_dim1], ldb, &vl[
+		    *ilo + 1 + *ilo * vl_dim1], ldvl);
+	}
+	i__1 = *lwork + 1 - iwrk;
+	sorgqr_(&irows, &irows, &irows, &vl[*ilo + *ilo * vl_dim1], ldvl, &
+		work[itau], &work[iwrk], &i__1, &ierr);
+    }
+
+    if (ilvr) {
+	slaset_("Full", n, n, &c_b57, &c_b58, &vr[vr_offset], ldvr)
+		;
+    }
+
+/*     Reduce to generalized Hessenberg form */
+/*     (Workspace: none needed) */
+
+    if (ilv || ! wantsn) {
+
+/*        Eigenvectors requested -- work on whole matrix. */
+
+	sgghrd_(jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset], 
+		ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr);
+    } else {
+	sgghrd_("N", "N", &irows, &c__1, &irows, &a[*ilo + *ilo * a_dim1], 
+		lda, &b[*ilo + *ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[
+		vr_offset], ldvr, &ierr);
+    }
+
+/*     Perform QZ algorithm (Compute eigenvalues, and optionally, the */
+/*     Schur forms and Schur vectors) */
+/*     (Workspace: need N) */
+
+    if (ilv || ! wantsn) {
+	*(unsigned char *)chtemp = 'S';
+    } else {
+	*(unsigned char *)chtemp = 'E';
+    }
+
+    shgeqz_(chtemp, jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset]
+	    , ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], ldvl, &
+	    vr[vr_offset], ldvr, &work[1], lwork, &ierr);
+    if (ierr != 0) {
+	if (ierr > 0 && ierr <= *n) {
+	    *info = ierr;
+	} else if (ierr > *n && ierr <= *n << 1) {
+	    *info = ierr - *n;
+	} else {
+	    *info = *n + 1;
+	}
+	goto L130;
+    }
+
+/*     Compute Eigenvectors and estimate condition numbers if desired */
+/*     (Workspace: STGEVC: need 6*N */
+/*                 STGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B', */
+/*                         need N otherwise ) */
+
+    if (ilv || ! wantsn) {
+	if (ilv) {
+	    if (ilvl) {
+		if (ilvr) {
+		    *(unsigned char *)chtemp = 'B';
+		} else {
+		    *(unsigned char *)chtemp = 'L';
+		}
+	    } else {
+		*(unsigned char *)chtemp = 'R';
+	    }
+
+	    stgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], 
+		    ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &
+		    work[1], &ierr);
+	    if (ierr != 0) {
+		*info = *n + 2;
+		goto L130;
+	    }
+	}
+
+	if (! wantsn) {
+
+/*           compute eigenvectors (STGEVC) and estimate condition */
+/*           numbers (STGSNA). Note that the definition of the condition */
+/*           number is not invariant under transformation (u,v) to */
+/*           (Q*u, Z*v), where (u,v) are eigenvectors of the generalized */
+/*           Schur form (S,T), Q and Z are orthogonal matrices. In order */
+/*           to avoid using extra 2*N*N workspace, we have to recalculate */
+/*           eigenvectors and estimate one condition numbers at a time. */
+
+	    pair = FALSE_;
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+
+		if (pair) {
+		    pair = FALSE_;
+		    goto L20;
+		}
+		mm = 1;
+		if (i__ < *n) {
+		    if (a[i__ + 1 + i__ * a_dim1] != 0.f) {
+			pair = TRUE_;
+			mm = 2;
+		    }
+		}
+
+		i__2 = *n;
+		for (j = 1; j <= i__2; ++j) {
+		    bwork[j] = FALSE_;
+/* L10: */
+		}
+		if (mm == 1) {
+		    bwork[i__] = TRUE_;
+		} else if (mm == 2) {
+		    bwork[i__] = TRUE_;
+		    bwork[i__ + 1] = TRUE_;
+		}
+
+		iwrk = mm * *n + 1;
+		iwrk1 = iwrk + mm * *n;
+
+/*              Compute a pair of left and right eigenvectors. */
+/*              (compute workspace: need up to 4*N + 6*N) */
+
+		if (wantse || wantsb) {
+		    stgevc_("B", "S", &bwork[1], n, &a[a_offset], lda, &b[
+			    b_offset], ldb, &work[1], n, &work[iwrk], n, &mm, 
+			    &m, &work[iwrk1], &ierr);
+		    if (ierr != 0) {
+			*info = *n + 2;
+			goto L130;
+		    }
+		}
+
+		i__2 = *lwork - iwrk1 + 1;
+		stgsna_(sense, "S", &bwork[1], n, &a[a_offset], lda, &b[
+			b_offset], ldb, &work[1], n, &work[iwrk], n, &rconde[
+			i__], &rcondv[i__], &mm, &m, &work[iwrk1], &i__2, &
+			iwork[1], &ierr);
+
+L20:
+		;
+	    }
+	}
+    }
+
+/*     Undo balancing on VL and VR and normalization */
+/*     (Workspace: none needed) */
+
+    if (ilvl) {
+	sggbak_(balanc, "L", n, ilo, ihi, &lscale[1], &rscale[1], n, &vl[
+		vl_offset], ldvl, &ierr);
+
+	i__1 = *n;
+	for (jc = 1; jc <= i__1; ++jc) {
+	    if (alphai[jc] < 0.f) {
+		goto L70;
+	    }
+	    temp = 0.f;
+	    if (alphai[jc] == 0.f) {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+		    r__2 = temp, r__3 = (r__1 = vl[jr + jc * vl_dim1], abs(
+			    r__1));
+		    temp = f2cmax(r__2,r__3);
+/* L30: */
+		}
+	    } else {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+		    r__3 = temp, r__4 = (r__1 = vl[jr + jc * vl_dim1], abs(
+			    r__1)) + (r__2 = vl[jr + (jc + 1) * vl_dim1], abs(
+			    r__2));
+		    temp = f2cmax(r__3,r__4);
+/* L40: */
+		}
+	    }
+	    if (temp < smlnum) {
+		goto L70;
+	    }
+	    temp = 1.f / temp;
+	    if (alphai[jc] == 0.f) {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    vl[jr + jc * vl_dim1] *= temp;
+/* L50: */
+		}
+	    } else {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    vl[jr + jc * vl_dim1] *= temp;
+		    vl[jr + (jc + 1) * vl_dim1] *= temp;
+/* L60: */
+		}
+	    }
+L70:
+	    ;
+	}
+    }
+    if (ilvr) {
+	sggbak_(balanc, "R", n, ilo, ihi, &lscale[1], &rscale[1], n, &vr[
+		vr_offset], ldvr, &ierr);
+	i__1 = *n;
+	for (jc = 1; jc <= i__1; ++jc) {
+	    if (alphai[jc] < 0.f) {
+		goto L120;
+	    }
+	    temp = 0.f;
+	    if (alphai[jc] == 0.f) {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+		    r__2 = temp, r__3 = (r__1 = vr[jr + jc * vr_dim1], abs(
+			    r__1));
+		    temp = f2cmax(r__2,r__3);
+/* L80: */
+		}
+	    } else {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+/* Computing MAX */
+		    r__3 = temp, r__4 = (r__1 = vr[jr + jc * vr_dim1], abs(
+			    r__1)) + (r__2 = vr[jr + (jc + 1) * vr_dim1], abs(
+			    r__2));
+		    temp = f2cmax(r__3,r__4);
+/* L90: */
+		}
+	    }
+	    if (temp < smlnum) {
+		goto L120;
+	    }
+	    temp = 1.f / temp;
+	    if (alphai[jc] == 0.f) {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    vr[jr + jc * vr_dim1] *= temp;
+/* L100: */
+		}
+	    } else {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    vr[jr + jc * vr_dim1] *= temp;
+		    vr[jr + (jc + 1) * vr_dim1] *= temp;
+/* L110: */
+		}
+	    }
+L120:
+	    ;
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L130:
+
+    if (ilascl) {
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &
+		ierr);
+	slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &
+		ierr);
+    }
+
+    if (ilbscl) {
+	slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
+		ierr);
+    }
+
+    work[1] = (real) maxwrk;
+    return 0;
+
+/*     End of SGGEVX */
+
+} /* sggevx_ */
+
diff --git a/lapack-netlib/SRC/sggglm.c b/lapack-netlib/SRC/sggglm.c
new file mode 100644
index 000000000..d2a47c68b
--- /dev/null
+++ b/lapack-netlib/SRC/sggglm.c
@@ -0,0 +1,787 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static real c_b32 = -1.f;
+static real c_b34 = 1.f;
+
+/* > \brief \b SGGGLM */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGGLM + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggglm.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggglm.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggglm.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, */
+/*                          INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
+/*       REAL               A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), */
+/*      $                   X( * ), Y( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGGLM solves a general Gauss-Markov linear model (GLM) problem: */
+/* > */
+/* >         minimize || y ||_2   subject to   d = A*x + B*y */
+/* >             x */
+/* > */
+/* > where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */
+/* > given N-vector. It is assumed that M <= N <= M+P, and */
+/* > */
+/* >            rank(A) = M    and    rank( A B ) = N. */
+/* > */
+/* > Under these assumptions, the constrained equation is always */
+/* > consistent, and there is a unique solution x and a minimal 2-norm */
+/* > solution y, which is obtained using a generalized QR factorization */
+/* > of the matrices (A, B) given by */
+/* > */
+/* >    A = Q*(R),   B = Q*T*Z. */
+/* >          (0) */
+/* > */
+/* > In particular, if matrix B is square nonsingular, then the problem */
+/* > GLM is equivalent to the following weighted linear least squares */
+/* > problem */
+/* > */
+/* >              minimize || inv(B)*(d-A*x) ||_2 */
+/* >                  x */
+/* > */
+/* > where inv(B) denotes the inverse of B. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of rows of the matrices A and B.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of columns of the matrix A.  0 <= M <= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] P */
+/* > \verbatim */
+/* >          P is INTEGER */
+/* >          The number of columns of the matrix B.  P >= N-M. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,M) */
+/* >          On entry, the N-by-M matrix A. */
+/* >          On exit, the upper triangular part of the array A contains */
+/* >          the M-by-M upper triangular matrix R. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,P) */
+/* >          On entry, the N-by-P matrix B. */
+/* >          On exit, if N <= P, the upper triangle of the subarray */
+/* >          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */
+/* >          if N > P, the elements on and above the (N-P)th subdiagonal */
+/* >          contain the N-by-P upper trapezoidal matrix T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          On entry, D is the left hand side of the GLM equation. */
+/* >          On exit, D is destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension (M) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] Y */
+/* > \verbatim */
+/* >          Y is REAL array, dimension (P) */
+/* > */
+/* >          On exit, X and Y are the solutions of the GLM problem. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= f2cmax(1,N+M+P). */
+/* >          For optimum performance, LWORK >= M+f2cmin(N,P)+f2cmax(N,P)*NB, */
+/* >          where NB is an upper bound for the optimal blocksizes for */
+/* >          SGEQRF, SGERQF, SORMQR and SORMRQ. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          = 1:  the upper triangular factor R associated with A in the */
+/* >                generalized QR factorization of the pair (A, B) is */
+/* >                singular, so that rank(A) < M; the least squares */
+/* >                solution could not be computed. */
+/* >          = 2:  the bottom (N-M) by (N-M) part of the upper trapezoidal */
+/* >                factor T associated with B in the generalized QR */
+/* >                factorization of the pair (A, B) is singular, so that */
+/* >                rank( A B ) < N; the least squares solution could not */
+/* >                be computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realOTHEReigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int sggglm_(integer *n, integer *m, integer *p, real *a, 
+	integer *lda, real *b, integer *ldb, real *d__, real *x, real *y, 
+	real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer lopt, i__;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *);
+    integer nb, np;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sggqrf_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, real *, real *, integer *
+	    , integer *);
+    integer lwkmin, nb1, nb2, nb3, nb4, lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *), sormrq_(char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , integer *, real *, integer *, integer *), 
+	    strtrs_(char *, char *, char *, integer *, integer *, real *, 
+	    integer *, real *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  =================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --d__;
+    --x;
+    --y;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    np = f2cmin(*n,*p);
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*m < 0 || *m > *n) {
+	*info = -2;
+    } else if (*p < 0 || *p < *n - *m) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -7;
+    }
+
+/*     Calculate workspace */
+
+    if (*info == 0) {
+	if (*n == 0) {
+	    lwkmin = 1;
+	    lwkopt = 1;
+	} else {
+	    nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6,
+		     (ftnlen)1);
+	    nb2 = ilaenv_(&c__1, "SGERQF", " ", n, m, &c_n1, &c_n1, (ftnlen)6,
+		     (ftnlen)1);
+	    nb3 = ilaenv_(&c__1, "SORMQR", " ", n, m, p, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	    nb4 = ilaenv_(&c__1, "SORMRQ", " ", n, m, p, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+/* Computing MAX */
+	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
+	    nb = f2cmax(i__1,nb4);
+	    lwkmin = *m + *n + *p;
+	    lwkopt = *m + np + f2cmax(*n,*p) * nb;
+	}
+	work[1] = (real) lwkopt;
+
+	if (*lwork < lwkmin && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGGLM", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    x[i__] = 0.f;
+	}
+	i__1 = *p;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    y[i__] = 0.f;
+	}
+	return 0;
+    }
+
+/*     Compute the GQR factorization of matrices A and B: */
+
+/*          Q**T*A = ( R11 ) M,    Q**T*B*Z**T = ( T11   T12 ) M */
+/*                   (  0  ) N-M                 (  0    T22 ) N-M */
+/*                      M                         M+P-N  N-M */
+
+/*     where R11 and T22 are upper triangular, and Q and Z are */
+/*     orthogonal. */
+
+    i__1 = *lwork - *m - np;
+    sggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m 
+	    + 1], &work[*m + np + 1], &i__1, info);
+    lopt = work[*m + np + 1];
+
+/*     Update left-hand-side vector d = Q**T*d = ( d1 ) M */
+/*                                               ( d2 ) N-M */
+
+    i__1 = f2cmax(1,*n);
+    i__2 = *lwork - *m - np;
+    sormqr_("Left", "Transpose", n, &c__1, m, &a[a_offset], lda, &work[1], &
+	    d__[1], &i__1, &work[*m + np + 1], &i__2, info);
+/* Computing MAX */
+    i__1 = lopt, i__2 = (integer) work[*m + np + 1];
+    lopt = f2cmax(i__1,i__2);
+
+/*     Solve T22*y2 = d2 for y2 */
+
+    if (*n > *m) {
+	i__1 = *n - *m;
+	i__2 = *n - *m;
+	strtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 
+		+ (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, 
+		info);
+
+	if (*info > 0) {
+	    *info = 1;
+	    return 0;
+	}
+
+	i__1 = *n - *m;
+	scopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1);
+    }
+
+/*     Set y1 = 0 */
+
+    i__1 = *m + *p - *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	y[i__] = 0.f;
+/* L10: */
+    }
+
+/*     Update d1 = d1 - T12*y2 */
+
+    i__1 = *n - *m;
+    sgemv_("No transpose", m, &i__1, &c_b32, &b[(*m + *p - *n + 1) * b_dim1 + 
+	    1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b34, &d__[1], &c__1);
+
+/*     Solve triangular system: R11*x = d1 */
+
+    if (*m > 0) {
+	strtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], 
+		lda, &d__[1], m, info);
+
+	if (*info > 0) {
+	    *info = 2;
+	    return 0;
+	}
+
+/*        Copy D to X */
+
+	scopy_(m, &d__[1], &c__1, &x[1], &c__1);
+    }
+
+/*     Backward transformation y = Z**T *y */
+
+/* Computing MAX */
+    i__1 = 1, i__2 = *n - *p + 1;
+    i__3 = f2cmax(1,*p);
+    i__4 = *lwork - *m - np;
+    sormrq_("Left", "Transpose", p, &c__1, &np, &b[f2cmax(i__1,i__2) + b_dim1], 
+	    ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &i__4, info);
+/* Computing MAX */
+    i__1 = lopt, i__2 = (integer) work[*m + np + 1];
+    work[1] = (real) (*m + np + f2cmax(i__1,i__2));
+
+    return 0;
+
+/*     End of SGGGLM */
+
+} /* sggglm_ */
+
diff --git a/lapack-netlib/SRC/sgghd3.c b/lapack-netlib/SRC/sgghd3.c
new file mode 100644
index 000000000..78428304a
--- /dev/null
+++ b/lapack-netlib/SRC/sgghd3.c
@@ -0,0 +1,1452 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static real c_b14 = 0.f;
+static real c_b15 = 1.f;
+static integer c__2 = 2;
+static integer c__3 = 3;
+static integer c__16 = 16;
+
+/* > \brief \b SGGHD3 */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGHRD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgghd3.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgghd3.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgghd3.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */
+/*                          LDQ, Z, LDZ, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          COMPQ, COMPZ */
+/*       INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK */
+/*       REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */
+/*      $                   Z( LDZ, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGHD3 reduces a pair of real matrices (A,B) to generalized upper */
+/* > Hessenberg form using orthogonal transformations, where A is a */
+/* > general matrix and B is upper triangular.  The form of the */
+/* > generalized eigenvalue problem is */
+/* >    A*x = lambda*B*x, */
+/* > and B is typically made upper triangular by computing its QR */
+/* > factorization and moving the orthogonal matrix Q to the left side */
+/* > of the equation. */
+/* > */
+/* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */
+/* >    Q**T*A*Z = H */
+/* > and transforms B to another upper triangular matrix T: */
+/* >    Q**T*B*Z = T */
+/* > in order to reduce the problem to its standard form */
+/* >    H*y = lambda*T*y */
+/* > where y = Z**T*x. */
+/* > */
+/* > The orthogonal matrices Q and Z are determined as products of Givens */
+/* > rotations.  They may either be formed explicitly, or they may be */
+/* > postmultiplied into input matrices Q1 and Z1, so that */
+/* > */
+/* >      Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T */
+/* > */
+/* >      Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T */
+/* > */
+/* > If Q1 is the orthogonal matrix from the QR factorization of B in the */
+/* > original equation A*x = lambda*B*x, then SGGHD3 reduces the original */
+/* > problem to generalized Hessenberg form. */
+/* > */
+/* > This is a blocked variant of SGGHRD, using matrix-matrix */
+/* > multiplications for parts of the computation to enhance performance. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] COMPQ */
+/* > \verbatim */
+/* >          COMPQ is CHARACTER*1 */
+/* >          = 'N': do not compute Q; */
+/* >          = 'I': Q is initialized to the unit matrix, and the */
+/* >                 orthogonal matrix Q is returned; */
+/* >          = 'V': Q must contain an orthogonal matrix Q1 on entry, */
+/* >                 and the product Q1*Q is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] COMPZ */
+/* > \verbatim */
+/* >          COMPZ is CHARACTER*1 */
+/* >          = 'N': do not compute Z; */
+/* >          = 'I': Z is initialized to the unit matrix, and the */
+/* >                 orthogonal matrix Z is returned; */
+/* >          = 'V': Z must contain an orthogonal matrix Z1 on entry, */
+/* >                 and the product Z1*Z is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices A and B.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* > */
+/* >          ILO and IHI mark the rows and columns of A which are to be */
+/* >          reduced.  It is assumed that A is already upper triangular */
+/* >          in rows and columns 1:ILO-1 and IHI+1:N.  ILO and IHI are */
+/* >          normally set by a previous call to SGGBAL; otherwise they */
+/* >          should be set to 1 and N respectively. */
+/* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the N-by-N general matrix to be reduced. */
+/* >          On exit, the upper triangle and the first subdiagonal of A */
+/* >          are overwritten with the upper Hessenberg matrix H, and the */
+/* >          rest is set to zero. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB, N) */
+/* >          On entry, the N-by-N upper triangular matrix B. */
+/* >          On exit, the upper triangular matrix T = Q**T B Z.  The */
+/* >          elements below the diagonal are set to zero. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Q */
+/* > \verbatim */
+/* >          Q is REAL array, dimension (LDQ, N) */
+/* >          On entry, if COMPQ = 'V', the orthogonal matrix Q1, */
+/* >          typically from the QR factorization of B. */
+/* >          On exit, if COMPQ='I', the orthogonal matrix Q, and if */
+/* >          COMPQ = 'V', the product Q1*Q. */
+/* >          Not referenced if COMPQ='N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDQ */
+/* > \verbatim */
+/* >          LDQ is INTEGER */
+/* >          The leading dimension of the array Q. */
+/* >          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Z */
+/* > \verbatim */
+/* >          Z is REAL array, dimension (LDZ, N) */
+/* >          On entry, if COMPZ = 'V', the orthogonal matrix Z1. */
+/* >          On exit, if COMPZ='I', the orthogonal matrix Z, and if */
+/* >          COMPZ = 'V', the product Z1*Z. */
+/* >          Not referenced if COMPZ='N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDZ */
+/* > \verbatim */
+/* >          LDZ is INTEGER */
+/* >          The leading dimension of the array Z. */
+/* >          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (LWORK) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in]  LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The length of the array WORK.  LWORK >= 1. */
+/* >          For optimum performance LWORK >= 6*N*NB, where NB is the */
+/* >          optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date January 2015 */
+
+/* > \ingroup realOTHERcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  This routine reduces A to Hessenberg form and maintains B in */
+/* >  using a blocked variant of Moler and Stewart's original algorithm, */
+/* >  as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti */
+/* >  (BIT 2008). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgghd3_(char *compq, char *compz, integer *n, integer *
+	ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real 
+	*q, integer *ldq, real *z__, integer *ldz, real *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, 
+	    z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8;
+    real r__1;
+
+    /* Local variables */
+    logical blk22;
+    integer cola, jcol, ierr;
+    real temp;
+    integer jrow, topq, ppwo;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
+	    integer *, real *, real *);
+    real temp1, temp2, temp3, c__;
+    integer kacc22, i__, j, k;
+    real s;
+    extern logical lsame_(char *, char *);
+    integer nbmin;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *), sgemv_(char *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *);
+    integer nblst;
+    logical initq;
+    real c1, c2;
+    extern /* Subroutine */ int sorm22_(char *, char *, integer *, integer *, 
+	    integer *, integer *, real *, integer *, real *, integer *, real *
+	    , integer *, integer *);
+    logical wantq;
+    integer j0;
+    logical initz, wantz;
+    real s1, s2;
+    extern /* Subroutine */ int strmv_(char *, char *, char *, integer *, 
+	    real *, integer *, real *, integer *);
+    char compq2[1], compz2[1];
+    integer nb, jj, nh, nx, pw;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    , real *, integer *, integer *), slaset_(char *, 
+	    integer *, integer *, real *, real *, real *, integer *), 
+	    slartg_(real *, real *, real *, real *, real *), slacpy_(char *, 
+	    integer *, integer *, real *, integer *, real *, integer *);
+    integer lwkopt;
+    logical lquery;
+    integer nnb, len, top, ppw, n2nb;
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     January 2015 */
+
+
+
+/*  ===================================================================== */
+
+
+/*     Decode and test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1 * 1;
+    q -= q_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+/* Computing MAX */
+    i__1 = *n * 6 * nb;
+    lwkopt = f2cmax(i__1,1);
+    work[1] = (real) lwkopt;
+    initq = lsame_(compq, "I");
+    wantq = initq || lsame_(compq, "V");
+    initz = lsame_(compz, "I");
+    wantz = initz || lsame_(compz, "V");
+    lquery = *lwork == -1;
+
+    if (! lsame_(compq, "N") && ! wantq) {
+	*info = -1;
+    } else if (! lsame_(compz, "N") && ! wantz) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1) {
+	*info = -4;
+    } else if (*ihi > *n || *ihi < *ilo - 1) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (wantq && *ldq < *n || *ldq < 1) {
+	*info = -11;
+    } else if (wantz && *ldz < *n || *ldz < 1) {
+	*info = -13;
+    } else if (*lwork < 1 && ! lquery) {
+	*info = -15;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGHD3", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize Q and Z if desired. */
+
+    if (initq) {
+	slaset_("All", n, n, &c_b14, &c_b15, &q[q_offset], ldq);
+    }
+    if (initz) {
+	slaset_("All", n, n, &c_b14, &c_b15, &z__[z_offset], ldz);
+    }
+
+/*     Zero out lower triangle of B. */
+
+    if (*n > 1) {
+	i__1 = *n - 1;
+	i__2 = *n - 1;
+	slaset_("Lower", &i__1, &i__2, &c_b14, &c_b14, &b[b_dim1 + 2], ldb);
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+/*     Determine the blocksize. */
+
+    nbmin = ilaenv_(&c__2, "SGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    if (nb > 1 && nb < nh) {
+
+/*        Determine when to use unblocked instead of blocked code. */
+
+/* Computing MAX */
+	i__1 = nb, i__2 = ilaenv_(&c__3, "SGGHD3", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    if (*lwork < lwkopt) {
+
+/*              Not enough workspace to use optimal NB:  determine the */
+/*              minimum value of NB, and reduce NB or force use of */
+/*              unblocked code. */
+
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGGHD3", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+		if (*lwork >= *n * 6 * nbmin) {
+		    nb = *lwork / (*n * 6);
+		} else {
+		    nb = 1;
+		}
+	    }
+	}
+    }
+
+    if (nb < nbmin || nb >= nh) {
+
+/*        Use unblocked code below */
+
+	jcol = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	kacc22 = ilaenv_(&c__16, "SGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6,
+		 (ftnlen)1);
+	blk22 = kacc22 == 2;
+	i__1 = *ihi - 2;
+	i__2 = nb;
+	for (jcol = *ilo; i__2 < 0 ? jcol >= i__1 : jcol <= i__1; jcol += 
+		i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - jcol - 1;
+	    nnb = f2cmin(i__3,i__4);
+
+/*           Initialize small orthogonal factors that will hold the */
+/*           accumulated Givens rotations in workspace. */
+/*           N2NB   denotes the number of 2*NNB-by-2*NNB factors */
+/*           NBLST  denotes the (possibly smaller) order of the last */
+/*                  factor. */
+
+	    n2nb = (*ihi - jcol - 1) / nnb - 1;
+	    nblst = *ihi - jcol - n2nb * nnb;
+	    slaset_("All", &nblst, &nblst, &c_b14, &c_b15, &work[1], &nblst);
+	    pw = nblst * nblst + 1;
+	    i__3 = n2nb;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		i__4 = nnb << 1;
+		i__5 = nnb << 1;
+		i__6 = nnb << 1;
+		slaset_("All", &i__4, &i__5, &c_b14, &c_b15, &work[pw], &i__6);
+		pw += (nnb << 2) * nnb;
+	    }
+
+/*           Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form. */
+
+	    i__3 = jcol + nnb - 1;
+	    for (j = jcol; j <= i__3; ++j) {
+
+/*              Reduce Jth column of A. Store cosines and sines in Jth */
+/*              column of A and B, respectively. */
+
+		i__4 = j + 2;
+		for (i__ = *ihi; i__ >= i__4; --i__) {
+		    temp = a[i__ - 1 + j * a_dim1];
+		    slartg_(&temp, &a[i__ + j * a_dim1], &c__, &s, &a[i__ - 1 
+			    + j * a_dim1]);
+		    a[i__ + j * a_dim1] = c__;
+		    b[i__ + j * b_dim1] = s;
+		}
+
+/*              Accumulate Givens rotations into workspace array. */
+
+		ppw = (nblst + 1) * (nblst - 2) - j + jcol + 1;
+		len = j + 2 - jcol;
+		jrow = j + n2nb * nnb + 2;
+		i__4 = jrow;
+		for (i__ = *ihi; i__ >= i__4; --i__) {
+		    c__ = a[i__ + j * a_dim1];
+		    s = b[i__ + j * b_dim1];
+		    i__5 = ppw + len - 1;
+		    for (jj = ppw; jj <= i__5; ++jj) {
+			temp = work[jj + nblst];
+			work[jj + nblst] = c__ * temp - s * work[jj];
+			work[jj] = s * temp + c__ * work[jj];
+		    }
+		    ++len;
+		    ppw = ppw - nblst - 1;
+		}
+
+		ppwo = nblst * nblst + (nnb + j - jcol - 1 << 1) * nnb + nnb;
+		j0 = jrow - nnb;
+		i__4 = j + 2;
+		i__5 = -nnb;
+		for (jrow = j0; i__5 < 0 ? jrow >= i__4 : jrow <= i__4; jrow 
+			+= i__5) {
+		    ppw = ppwo;
+		    len = j + 2 - jcol;
+		    i__6 = jrow;
+		    for (i__ = jrow + nnb - 1; i__ >= i__6; --i__) {
+			c__ = a[i__ + j * a_dim1];
+			s = b[i__ + j * b_dim1];
+			i__7 = ppw + len - 1;
+			for (jj = ppw; jj <= i__7; ++jj) {
+			    temp = work[jj + (nnb << 1)];
+			    work[jj + (nnb << 1)] = c__ * temp - s * work[jj];
+			    work[jj] = s * temp + c__ * work[jj];
+			}
+			++len;
+			ppw = ppw - (nnb << 1) - 1;
+		    }
+		    ppwo += (nnb << 2) * nnb;
+		}
+
+/*              TOP denotes the number of top rows in A and B that will */
+/*              not be updated during the next steps. */
+
+		if (jcol <= 2) {
+		    top = 0;
+		} else {
+		    top = jcol;
+		}
+
+/*              Propagate transformations through B and replace stored */
+/*              left sines/cosines by right sines/cosines. */
+
+		i__5 = j + 1;
+		for (jj = *n; jj >= i__5; --jj) {
+
+/*                 Update JJth column of B. */
+
+/* Computing MIN */
+		    i__4 = jj + 1;
+		    i__6 = j + 2;
+		    for (i__ = f2cmin(i__4,*ihi); i__ >= i__6; --i__) {
+			c__ = a[i__ + j * a_dim1];
+			s = b[i__ + j * b_dim1];
+			temp = b[i__ + jj * b_dim1];
+			b[i__ + jj * b_dim1] = c__ * temp - s * b[i__ - 1 + 
+				jj * b_dim1];
+			b[i__ - 1 + jj * b_dim1] = s * temp + c__ * b[i__ - 1 
+				+ jj * b_dim1];
+		    }
+
+/*                 Annihilate B( JJ+1, JJ ). */
+
+		    if (jj < *ihi) {
+			temp = b[jj + 1 + (jj + 1) * b_dim1];
+			slartg_(&temp, &b[jj + 1 + jj * b_dim1], &c__, &s, &b[
+				jj + 1 + (jj + 1) * b_dim1]);
+			b[jj + 1 + jj * b_dim1] = 0.f;
+			i__6 = jj - top;
+			srot_(&i__6, &b[top + 1 + (jj + 1) * b_dim1], &c__1, &
+				b[top + 1 + jj * b_dim1], &c__1, &c__, &s);
+			a[jj + 1 + j * a_dim1] = c__;
+			b[jj + 1 + j * b_dim1] = -s;
+		    }
+		}
+
+/*              Update A by transformations from right. */
+/*              Explicit loop unrolling provides better performance */
+/*              compared to SLASR. */
+/*               CALL SLASR( 'Right', 'Variable', 'Backward', IHI-TOP, */
+/*     $                     IHI-J, A( J+2, J ), B( J+2, J ), */
+/*     $                     A( TOP+1, J+1 ), LDA ) */
+
+		jj = (*ihi - j - 1) % 3;
+		i__5 = jj + 1;
+		for (i__ = *ihi - j - 3; i__ >= i__5; i__ += -3) {
+		    c__ = a[j + 1 + i__ + j * a_dim1];
+		    s = -b[j + 1 + i__ + j * b_dim1];
+		    c1 = a[j + 2 + i__ + j * a_dim1];
+		    s1 = -b[j + 2 + i__ + j * b_dim1];
+		    c2 = a[j + 3 + i__ + j * a_dim1];
+		    s2 = -b[j + 3 + i__ + j * b_dim1];
+
+		    i__6 = *ihi;
+		    for (k = top + 1; k <= i__6; ++k) {
+			temp = a[k + (j + i__) * a_dim1];
+			temp1 = a[k + (j + i__ + 1) * a_dim1];
+			temp2 = a[k + (j + i__ + 2) * a_dim1];
+			temp3 = a[k + (j + i__ + 3) * a_dim1];
+			a[k + (j + i__ + 3) * a_dim1] = c2 * temp3 + s2 * 
+				temp2;
+			temp2 = -s2 * temp3 + c2 * temp2;
+			a[k + (j + i__ + 2) * a_dim1] = c1 * temp2 + s1 * 
+				temp1;
+			temp1 = -s1 * temp2 + c1 * temp1;
+			a[k + (j + i__ + 1) * a_dim1] = c__ * temp1 + s * 
+				temp;
+			a[k + (j + i__) * a_dim1] = -s * temp1 + c__ * temp;
+		    }
+		}
+
+		if (jj > 0) {
+		    for (i__ = jj; i__ >= 1; --i__) {
+			i__5 = *ihi - top;
+			r__1 = -b[j + 1 + i__ + j * b_dim1];
+			srot_(&i__5, &a[top + 1 + (j + i__ + 1) * a_dim1], &
+				c__1, &a[top + 1 + (j + i__) * a_dim1], &c__1,
+				 &a[j + 1 + i__ + j * a_dim1], &r__1);
+		    }
+		}
+
+/*              Update (J+1)th column of A by transformations from left. */
+
+		if (j < jcol + nnb - 1) {
+		    len = j + 1 - jcol;
+
+/*                 Multiply with the trailing accumulated orthogonal */
+/*                 matrix, which takes the form */
+
+/*                        [  U11  U12  ] */
+/*                    U = [            ], */
+/*                        [  U21  U22  ] */
+
+/*                 where U21 is a LEN-by-LEN matrix and U12 is lower */
+/*                 triangular. */
+
+		    jrow = *ihi - nblst + 1;
+		    sgemv_("Transpose", &nblst, &len, &c_b15, &work[1], &
+			    nblst, &a[jrow + (j + 1) * a_dim1], &c__1, &c_b14,
+			     &work[pw], &c__1);
+		    ppw = pw + len;
+		    i__5 = jrow + nblst - len - 1;
+		    for (i__ = jrow; i__ <= i__5; ++i__) {
+			work[ppw] = a[i__ + (j + 1) * a_dim1];
+			++ppw;
+		    }
+		    i__5 = nblst - len;
+		    strmv_("Lower", "Transpose", "Non-unit", &i__5, &work[len 
+			    * nblst + 1], &nblst, &work[pw + len], &c__1);
+		    i__5 = nblst - len;
+		    sgemv_("Transpose", &len, &i__5, &c_b15, &work[(len + 1) *
+			     nblst - len + 1], &nblst, &a[jrow + nblst - len 
+			    + (j + 1) * a_dim1], &c__1, &c_b15, &work[pw + 
+			    len], &c__1);
+		    ppw = pw;
+		    i__5 = jrow + nblst - 1;
+		    for (i__ = jrow; i__ <= i__5; ++i__) {
+			a[i__ + (j + 1) * a_dim1] = work[ppw];
+			++ppw;
+		    }
+
+/*                 Multiply with the other accumulated orthogonal */
+/*                 matrices, which take the form */
+
+/*                        [  U11  U12   0  ] */
+/*                        [                ] */
+/*                    U = [  U21  U22   0  ], */
+/*                        [                ] */
+/*                        [   0    0    I  ] */
+
+/*                 where I denotes the (NNB-LEN)-by-(NNB-LEN) identity */
+/*                 matrix, U21 is a LEN-by-LEN upper triangular matrix */
+/*                 and U12 is an NNB-by-NNB lower triangular matrix. */
+
+		    ppwo = nblst * nblst + 1;
+		    j0 = jrow - nnb;
+		    i__5 = jcol + 1;
+		    i__6 = -nnb;
+		    for (jrow = j0; i__6 < 0 ? jrow >= i__5 : jrow <= i__5; 
+			    jrow += i__6) {
+			ppw = pw + len;
+			i__4 = jrow + nnb - 1;
+			for (i__ = jrow; i__ <= i__4; ++i__) {
+			    work[ppw] = a[i__ + (j + 1) * a_dim1];
+			    ++ppw;
+			}
+			ppw = pw;
+			i__4 = jrow + nnb + len - 1;
+			for (i__ = jrow + nnb; i__ <= i__4; ++i__) {
+			    work[ppw] = a[i__ + (j + 1) * a_dim1];
+			    ++ppw;
+			}
+			i__4 = nnb << 1;
+			strmv_("Upper", "Transpose", "Non-unit", &len, &work[
+				ppwo + nnb], &i__4, &work[pw], &c__1);
+			i__4 = nnb << 1;
+			strmv_("Lower", "Transpose", "Non-unit", &nnb, &work[
+				ppwo + (len << 1) * nnb], &i__4, &work[pw + 
+				len], &c__1);
+			i__4 = nnb << 1;
+			sgemv_("Transpose", &nnb, &len, &c_b15, &work[ppwo], &
+				i__4, &a[jrow + (j + 1) * a_dim1], &c__1, &
+				c_b15, &work[pw], &c__1);
+			i__4 = nnb << 1;
+			sgemv_("Transpose", &len, &nnb, &c_b15, &work[ppwo + (
+				len << 1) * nnb + nnb], &i__4, &a[jrow + nnb 
+				+ (j + 1) * a_dim1], &c__1, &c_b15, &work[pw 
+				+ len], &c__1);
+			ppw = pw;
+			i__4 = jrow + len + nnb - 1;
+			for (i__ = jrow; i__ <= i__4; ++i__) {
+			    a[i__ + (j + 1) * a_dim1] = work[ppw];
+			    ++ppw;
+			}
+			ppwo += (nnb << 2) * nnb;
+		    }
+		}
+	    }
+
+/*           Apply accumulated orthogonal matrices to A. */
+
+	    cola = *n - jcol - nnb + 1;
+	    j = *ihi - nblst + 1;
+	    sgemm_("Transpose", "No Transpose", &nblst, &cola, &nblst, &c_b15,
+		     &work[1], &nblst, &a[j + (jcol + nnb) * a_dim1], lda, &
+		    c_b14, &work[pw], &nblst);
+	    slacpy_("All", &nblst, &cola, &work[pw], &nblst, &a[j + (jcol + 
+		    nnb) * a_dim1], lda);
+	    ppwo = nblst * nblst + 1;
+	    j0 = j - nnb;
+	    i__3 = jcol + 1;
+	    i__6 = -nnb;
+	    for (j = j0; i__6 < 0 ? j >= i__3 : j <= i__3; j += i__6) {
+		if (blk22) {
+
+/*                 Exploit the structure of */
+
+/*                        [  U11  U12  ] */
+/*                    U = [            ] */
+/*                        [  U21  U22  ], */
+
+/*                 where all blocks are NNB-by-NNB, U21 is upper */
+/*                 triangular and U12 is lower triangular. */
+
+		    i__5 = nnb << 1;
+		    i__4 = nnb << 1;
+		    i__7 = *lwork - pw + 1;
+		    sorm22_("Left", "Transpose", &i__5, &cola, &nnb, &nnb, &
+			    work[ppwo], &i__4, &a[j + (jcol + nnb) * a_dim1], 
+			    lda, &work[pw], &i__7, &ierr);
+		} else {
+
+/*                 Ignore the structure of U. */
+
+		    i__5 = nnb << 1;
+		    i__4 = nnb << 1;
+		    i__7 = nnb << 1;
+		    i__8 = nnb << 1;
+		    sgemm_("Transpose", "No Transpose", &i__5, &cola, &i__4, &
+			    c_b15, &work[ppwo], &i__7, &a[j + (jcol + nnb) * 
+			    a_dim1], lda, &c_b14, &work[pw], &i__8);
+		    i__5 = nnb << 1;
+		    i__4 = nnb << 1;
+		    slacpy_("All", &i__5, &cola, &work[pw], &i__4, &a[j + (
+			    jcol + nnb) * a_dim1], lda);
+		}
+		ppwo += (nnb << 2) * nnb;
+	    }
+
+/*           Apply accumulated orthogonal matrices to Q. */
+
+	    if (wantq) {
+		j = *ihi - nblst + 1;
+		if (initq) {
+/* Computing MAX */
+		    i__6 = 2, i__3 = j - jcol + 1;
+		    topq = f2cmax(i__6,i__3);
+		    nh = *ihi - topq + 1;
+		} else {
+		    topq = 1;
+		    nh = *n;
+		}
+		sgemm_("No Transpose", "No Transpose", &nh, &nblst, &nblst, &
+			c_b15, &q[topq + j * q_dim1], ldq, &work[1], &nblst, &
+			c_b14, &work[pw], &nh);
+		slacpy_("All", &nh, &nblst, &work[pw], &nh, &q[topq + j * 
+			q_dim1], ldq);
+		ppwo = nblst * nblst + 1;
+		j0 = j - nnb;
+		i__6 = jcol + 1;
+		i__3 = -nnb;
+		for (j = j0; i__3 < 0 ? j >= i__6 : j <= i__6; j += i__3) {
+		    if (initq) {
+/* Computing MAX */
+			i__5 = 2, i__4 = j - jcol + 1;
+			topq = f2cmax(i__5,i__4);
+			nh = *ihi - topq + 1;
+		    }
+		    if (blk22) {
+
+/*                    Exploit the structure of U. */
+
+			i__5 = nnb << 1;
+			i__4 = nnb << 1;
+			i__7 = *lwork - pw + 1;
+			sorm22_("Right", "No Transpose", &nh, &i__5, &nnb, &
+				nnb, &work[ppwo], &i__4, &q[topq + j * q_dim1]
+				, ldq, &work[pw], &i__7, &ierr);
+		    } else {
+
+/*                    Ignore the structure of U. */
+
+			i__5 = nnb << 1;
+			i__4 = nnb << 1;
+			i__7 = nnb << 1;
+			sgemm_("No Transpose", "No Transpose", &nh, &i__5, &
+				i__4, &c_b15, &q[topq + j * q_dim1], ldq, &
+				work[ppwo], &i__7, &c_b14, &work[pw], &nh);
+			i__5 = nnb << 1;
+			slacpy_("All", &nh, &i__5, &work[pw], &nh, &q[topq + 
+				j * q_dim1], ldq);
+		    }
+		    ppwo += (nnb << 2) * nnb;
+		}
+	    }
+
+/*           Accumulate right Givens rotations if required. */
+
+	    if (wantz || top > 0) {
+
+/*              Initialize small orthogonal factors that will hold the */
+/*              accumulated Givens rotations in workspace. */
+
+		slaset_("All", &nblst, &nblst, &c_b14, &c_b15, &work[1], &
+			nblst);
+		pw = nblst * nblst + 1;
+		i__3 = n2nb;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    i__6 = nnb << 1;
+		    i__5 = nnb << 1;
+		    i__4 = nnb << 1;
+		    slaset_("All", &i__6, &i__5, &c_b14, &c_b15, &work[pw], &
+			    i__4);
+		    pw += (nnb << 2) * nnb;
+		}
+
+/*              Accumulate Givens rotations into workspace array. */
+
+		i__3 = jcol + nnb - 1;
+		for (j = jcol; j <= i__3; ++j) {
+		    ppw = (nblst + 1) * (nblst - 2) - j + jcol + 1;
+		    len = j + 2 - jcol;
+		    jrow = j + n2nb * nnb + 2;
+		    i__6 = jrow;
+		    for (i__ = *ihi; i__ >= i__6; --i__) {
+			c__ = a[i__ + j * a_dim1];
+			a[i__ + j * a_dim1] = 0.f;
+			s = b[i__ + j * b_dim1];
+			b[i__ + j * b_dim1] = 0.f;
+			i__5 = ppw + len - 1;
+			for (jj = ppw; jj <= i__5; ++jj) {
+			    temp = work[jj + nblst];
+			    work[jj + nblst] = c__ * temp - s * work[jj];
+			    work[jj] = s * temp + c__ * work[jj];
+			}
+			++len;
+			ppw = ppw - nblst - 1;
+		    }
+
+		    ppwo = nblst * nblst + (nnb + j - jcol - 1 << 1) * nnb + 
+			    nnb;
+		    j0 = jrow - nnb;
+		    i__6 = j + 2;
+		    i__5 = -nnb;
+		    for (jrow = j0; i__5 < 0 ? jrow >= i__6 : jrow <= i__6; 
+			    jrow += i__5) {
+			ppw = ppwo;
+			len = j + 2 - jcol;
+			i__4 = jrow;
+			for (i__ = jrow + nnb - 1; i__ >= i__4; --i__) {
+			    c__ = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = 0.f;
+			    s = b[i__ + j * b_dim1];
+			    b[i__ + j * b_dim1] = 0.f;
+			    i__7 = ppw + len - 1;
+			    for (jj = ppw; jj <= i__7; ++jj) {
+				temp = work[jj + (nnb << 1)];
+				work[jj + (nnb << 1)] = c__ * temp - s * work[
+					jj];
+				work[jj] = s * temp + c__ * work[jj];
+			    }
+			    ++len;
+			    ppw = ppw - (nnb << 1) - 1;
+			}
+			ppwo += (nnb << 2) * nnb;
+		    }
+		}
+	    } else {
+
+		i__3 = *ihi - jcol - 1;
+		slaset_("Lower", &i__3, &nnb, &c_b14, &c_b14, &a[jcol + 2 + 
+			jcol * a_dim1], lda);
+		i__3 = *ihi - jcol - 1;
+		slaset_("Lower", &i__3, &nnb, &c_b14, &c_b14, &b[jcol + 2 + 
+			jcol * b_dim1], ldb);
+	    }
+
+/*           Apply accumulated orthogonal matrices to A and B. */
+
+	    if (top > 0) {
+		j = *ihi - nblst + 1;
+		sgemm_("No Transpose", "No Transpose", &top, &nblst, &nblst, &
+			c_b15, &a[j * a_dim1 + 1], lda, &work[1], &nblst, &
+			c_b14, &work[pw], &top);
+		slacpy_("All", &top, &nblst, &work[pw], &top, &a[j * a_dim1 + 
+			1], lda);
+		ppwo = nblst * nblst + 1;
+		j0 = j - nnb;
+		i__3 = jcol + 1;
+		i__5 = -nnb;
+		for (j = j0; i__5 < 0 ? j >= i__3 : j <= i__3; j += i__5) {
+		    if (blk22) {
+
+/*                    Exploit the structure of U. */
+
+			i__6 = nnb << 1;
+			i__4 = nnb << 1;
+			i__7 = *lwork - pw + 1;
+			sorm22_("Right", "No Transpose", &top, &i__6, &nnb, &
+				nnb, &work[ppwo], &i__4, &a[j * a_dim1 + 1], 
+				lda, &work[pw], &i__7, &ierr);
+		    } else {
+
+/*                    Ignore the structure of U. */
+
+			i__6 = nnb << 1;
+			i__4 = nnb << 1;
+			i__7 = nnb << 1;
+			sgemm_("No Transpose", "No Transpose", &top, &i__6, &
+				i__4, &c_b15, &a[j * a_dim1 + 1], lda, &work[
+				ppwo], &i__7, &c_b14, &work[pw], &top);
+			i__6 = nnb << 1;
+			slacpy_("All", &top, &i__6, &work[pw], &top, &a[j * 
+				a_dim1 + 1], lda);
+		    }
+		    ppwo += (nnb << 2) * nnb;
+		}
+
+		j = *ihi - nblst + 1;
+		sgemm_("No Transpose", "No Transpose", &top, &nblst, &nblst, &
+			c_b15, &b[j * b_dim1 + 1], ldb, &work[1], &nblst, &
+			c_b14, &work[pw], &top);
+		slacpy_("All", &top, &nblst, &work[pw], &top, &b[j * b_dim1 + 
+			1], ldb);
+		ppwo = nblst * nblst + 1;
+		j0 = j - nnb;
+		i__5 = jcol + 1;
+		i__3 = -nnb;
+		for (j = j0; i__3 < 0 ? j >= i__5 : j <= i__5; j += i__3) {
+		    if (blk22) {
+
+/*                    Exploit the structure of U. */
+
+			i__6 = nnb << 1;
+			i__4 = nnb << 1;
+			i__7 = *lwork - pw + 1;
+			sorm22_("Right", "No Transpose", &top, &i__6, &nnb, &
+				nnb, &work[ppwo], &i__4, &b[j * b_dim1 + 1], 
+				ldb, &work[pw], &i__7, &ierr);
+		    } else {
+
+/*                    Ignore the structure of U. */
+
+			i__6 = nnb << 1;
+			i__4 = nnb << 1;
+			i__7 = nnb << 1;
+			sgemm_("No Transpose", "No Transpose", &top, &i__6, &
+				i__4, &c_b15, &b[j * b_dim1 + 1], ldb, &work[
+				ppwo], &i__7, &c_b14, &work[pw], &top);
+			i__6 = nnb << 1;
+			slacpy_("All", &top, &i__6, &work[pw], &top, &b[j * 
+				b_dim1 + 1], ldb);
+		    }
+		    ppwo += (nnb << 2) * nnb;
+		}
+	    }
+
+/*           Apply accumulated orthogonal matrices to Z. */
+
+	    if (wantz) {
+		j = *ihi - nblst + 1;
+		if (initq) {
+/* Computing MAX */
+		    i__3 = 2, i__5 = j - jcol + 1;
+		    topq = f2cmax(i__3,i__5);
+		    nh = *ihi - topq + 1;
+		} else {
+		    topq = 1;
+		    nh = *n;
+		}
+		sgemm_("No Transpose", "No Transpose", &nh, &nblst, &nblst, &
+			c_b15, &z__[topq + j * z_dim1], ldz, &work[1], &nblst,
+			 &c_b14, &work[pw], &nh);
+		slacpy_("All", &nh, &nblst, &work[pw], &nh, &z__[topq + j * 
+			z_dim1], ldz);
+		ppwo = nblst * nblst + 1;
+		j0 = j - nnb;
+		i__3 = jcol + 1;
+		i__5 = -nnb;
+		for (j = j0; i__5 < 0 ? j >= i__3 : j <= i__3; j += i__5) {
+		    if (initq) {
+/* Computing MAX */
+			i__6 = 2, i__4 = j - jcol + 1;
+			topq = f2cmax(i__6,i__4);
+			nh = *ihi - topq + 1;
+		    }
+		    if (blk22) {
+
+/*                    Exploit the structure of U. */
+
+			i__6 = nnb << 1;
+			i__4 = nnb << 1;
+			i__7 = *lwork - pw + 1;
+			sorm22_("Right", "No Transpose", &nh, &i__6, &nnb, &
+				nnb, &work[ppwo], &i__4, &z__[topq + j * 
+				z_dim1], ldz, &work[pw], &i__7, &ierr);
+		    } else {
+
+/*                    Ignore the structure of U. */
+
+			i__6 = nnb << 1;
+			i__4 = nnb << 1;
+			i__7 = nnb << 1;
+			sgemm_("No Transpose", "No Transpose", &nh, &i__6, &
+				i__4, &c_b15, &z__[topq + j * z_dim1], ldz, &
+				work[ppwo], &i__7, &c_b14, &work[pw], &nh);
+			i__6 = nnb << 1;
+			slacpy_("All", &nh, &i__6, &work[pw], &nh, &z__[topq 
+				+ j * z_dim1], ldz);
+		    }
+		    ppwo += (nnb << 2) * nnb;
+		}
+	    }
+	}
+    }
+
+/*     Use unblocked code to reduce the rest of the matrix */
+/*     Avoid re-initialization of modified Q and Z. */
+
+    *(unsigned char *)compq2 = *(unsigned char *)compq;
+    *(unsigned char *)compz2 = *(unsigned char *)compz;
+    if (jcol != *ilo) {
+	if (wantq) {
+	    *(unsigned char *)compq2 = 'V';
+	}
+	if (wantz) {
+	    *(unsigned char *)compz2 = 'V';
+	}
+    }
+
+    if (jcol < *ihi) {
+	sgghrd_(compq2, compz2, n, &jcol, ihi, &a[a_offset], lda, &b[b_offset]
+		, ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &ierr);
+    }
+    work[1] = (real) lwkopt;
+
+    return 0;
+
+/*     End of SGGHD3 */
+
+} /* sgghd3_ */
+
diff --git a/lapack-netlib/SRC/sgghrd.c b/lapack-netlib/SRC/sgghrd.c
new file mode 100644
index 000000000..6c705831f
--- /dev/null
+++ b/lapack-netlib/SRC/sgghrd.c
@@ -0,0 +1,784 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static real c_b10 = 0.f;
+static real c_b11 = 1.f;
+static integer c__1 = 1;
+
+/* > \brief \b SGGHRD */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGHRD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgghrd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgghrd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgghrd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */
+/*                          LDQ, Z, LDZ, INFO ) */
+
+/*       CHARACTER          COMPQ, COMPZ */
+/*       INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N */
+/*       REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */
+/*      $                   Z( LDZ, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGHRD reduces a pair of real matrices (A,B) to generalized upper */
+/* > Hessenberg form using orthogonal transformations, where A is a */
+/* > general matrix and B is upper triangular.  The form of the */
+/* > generalized eigenvalue problem is */
+/* >    A*x = lambda*B*x, */
+/* > and B is typically made upper triangular by computing its QR */
+/* > factorization and moving the orthogonal matrix Q to the left side */
+/* > of the equation. */
+/* > */
+/* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */
+/* >    Q**T*A*Z = H */
+/* > and transforms B to another upper triangular matrix T: */
+/* >    Q**T*B*Z = T */
+/* > in order to reduce the problem to its standard form */
+/* >    H*y = lambda*T*y */
+/* > where y = Z**T*x. */
+/* > */
+/* > The orthogonal matrices Q and Z are determined as products of Givens */
+/* > rotations.  They may either be formed explicitly, or they may be */
+/* > postmultiplied into input matrices Q1 and Z1, so that */
+/* > */
+/* >      Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T */
+/* > */
+/* >      Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T */
+/* > */
+/* > If Q1 is the orthogonal matrix from the QR factorization of B in the */
+/* > original equation A*x = lambda*B*x, then SGGHRD reduces the original */
+/* > problem to generalized Hessenberg form. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] COMPQ */
+/* > \verbatim */
+/* >          COMPQ is CHARACTER*1 */
+/* >          = 'N': do not compute Q; */
+/* >          = 'I': Q is initialized to the unit matrix, and the */
+/* >                 orthogonal matrix Q is returned; */
+/* >          = 'V': Q must contain an orthogonal matrix Q1 on entry, */
+/* >                 and the product Q1*Q is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] COMPZ */
+/* > \verbatim */
+/* >          COMPZ is CHARACTER*1 */
+/* >          = 'N': do not compute Z; */
+/* >          = 'I': Z is initialized to the unit matrix, and the */
+/* >                 orthogonal matrix Z is returned; */
+/* >          = 'V': Z must contain an orthogonal matrix Z1 on entry, */
+/* >                 and the product Z1*Z is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices A and B.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* > */
+/* >          ILO and IHI mark the rows and columns of A which are to be */
+/* >          reduced.  It is assumed that A is already upper triangular */
+/* >          in rows and columns 1:ILO-1 and IHI+1:N.  ILO and IHI are */
+/* >          normally set by a previous call to SGGBAL; otherwise they */
+/* >          should be set to 1 and N respectively. */
+/* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA, N) */
+/* >          On entry, the N-by-N general matrix to be reduced. */
+/* >          On exit, the upper triangle and the first subdiagonal of A */
+/* >          are overwritten with the upper Hessenberg matrix H, and the */
+/* >          rest is set to zero. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB, N) */
+/* >          On entry, the N-by-N upper triangular matrix B. */
+/* >          On exit, the upper triangular matrix T = Q**T B Z.  The */
+/* >          elements below the diagonal are set to zero. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Q */
+/* > \verbatim */
+/* >          Q is REAL array, dimension (LDQ, N) */
+/* >          On entry, if COMPQ = 'V', the orthogonal matrix Q1, */
+/* >          typically from the QR factorization of B. */
+/* >          On exit, if COMPQ='I', the orthogonal matrix Q, and if */
+/* >          COMPQ = 'V', the product Q1*Q. */
+/* >          Not referenced if COMPQ='N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDQ */
+/* > \verbatim */
+/* >          LDQ is INTEGER */
+/* >          The leading dimension of the array Q. */
+/* >          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Z */
+/* > \verbatim */
+/* >          Z is REAL array, dimension (LDZ, N) */
+/* >          On entry, if COMPZ = 'V', the orthogonal matrix Z1. */
+/* >          On exit, if COMPZ='I', the orthogonal matrix Z, and if */
+/* >          COMPZ = 'V', the product Z1*Z. */
+/* >          Not referenced if COMPZ='N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDZ */
+/* > \verbatim */
+/* >          LDZ is INTEGER */
+/* >          The leading dimension of the array Z. */
+/* >          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realOTHERcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  This routine reduces A to Hessenberg and B to triangular form by */
+/* >  an unblocked reduction, as described in _Matrix_Computations_, */
+/* >  by Golub and Van Loan (Johns Hopkins Press.) */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sgghrd_(char *compq, char *compz, integer *n, integer *
+	ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real 
+	*q, integer *ldq, real *z__, integer *ldz, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, 
+	    z_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer jcol;
+    real temp;
+    integer jrow;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
+	    integer *, real *, real *);
+    real c__, s;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    integer icompq;
+    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, 
+	    real *, real *, integer *), slartg_(real *, real *, real *
+	    , real *, real *);
+    integer icompz;
+    logical ilq, ilz;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode COMPQ */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1 * 1;
+    q -= q_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+
+    /* Function Body */
+    if (lsame_(compq, "N")) {
+	ilq = FALSE_;
+	icompq = 1;
+    } else if (lsame_(compq, "V")) {
+	ilq = TRUE_;
+	icompq = 2;
+    } else if (lsame_(compq, "I")) {
+	ilq = TRUE_;
+	icompq = 3;
+    } else {
+	icompq = 0;
+    }
+
+/*     Decode COMPZ */
+
+    if (lsame_(compz, "N")) {
+	ilz = FALSE_;
+	icompz = 1;
+    } else if (lsame_(compz, "V")) {
+	ilz = TRUE_;
+	icompz = 2;
+    } else if (lsame_(compz, "I")) {
+	ilz = TRUE_;
+	icompz = 3;
+    } else {
+	icompz = 0;
+    }
+
+/*     Test the input parameters. */
+
+    *info = 0;
+    if (icompq <= 0) {
+	*info = -1;
+    } else if (icompz <= 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1) {
+	*info = -4;
+    } else if (*ihi > *n || *ihi < *ilo - 1) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (ilq && *ldq < *n || *ldq < 1) {
+	*info = -11;
+    } else if (ilz && *ldz < *n || *ldz < 1) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGHRD", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Initialize Q and Z if desired. */
+
+    if (icompq == 3) {
+	slaset_("Full", n, n, &c_b10, &c_b11, &q[q_offset], ldq);
+    }
+    if (icompz == 3) {
+	slaset_("Full", n, n, &c_b10, &c_b11, &z__[z_offset], ldz);
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 1) {
+	return 0;
+    }
+
+/*     Zero out lower triangle of B */
+
+    i__1 = *n - 1;
+    for (jcol = 1; jcol <= i__1; ++jcol) {
+	i__2 = *n;
+	for (jrow = jcol + 1; jrow <= i__2; ++jrow) {
+	    b[jrow + jcol * b_dim1] = 0.f;
+/* L10: */
+	}
+/* L20: */
+    }
+
+/*     Reduce A and B */
+
+    i__1 = *ihi - 2;
+    for (jcol = *ilo; jcol <= i__1; ++jcol) {
+
+	i__2 = jcol + 2;
+	for (jrow = *ihi; jrow >= i__2; --jrow) {
+
+/*           Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */
+
+	    temp = a[jrow - 1 + jcol * a_dim1];
+	    slartg_(&temp, &a[jrow + jcol * a_dim1], &c__, &s, &a[jrow - 1 + 
+		    jcol * a_dim1]);
+	    a[jrow + jcol * a_dim1] = 0.f;
+	    i__3 = *n - jcol;
+	    srot_(&i__3, &a[jrow - 1 + (jcol + 1) * a_dim1], lda, &a[jrow + (
+		    jcol + 1) * a_dim1], lda, &c__, &s);
+	    i__3 = *n + 2 - jrow;
+	    srot_(&i__3, &b[jrow - 1 + (jrow - 1) * b_dim1], ldb, &b[jrow + (
+		    jrow - 1) * b_dim1], ldb, &c__, &s);
+	    if (ilq) {
+		srot_(n, &q[(jrow - 1) * q_dim1 + 1], &c__1, &q[jrow * q_dim1 
+			+ 1], &c__1, &c__, &s);
+	    }
+
+/*           Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */
+
+	    temp = b[jrow + jrow * b_dim1];
+	    slartg_(&temp, &b[jrow + (jrow - 1) * b_dim1], &c__, &s, &b[jrow 
+		    + jrow * b_dim1]);
+	    b[jrow + (jrow - 1) * b_dim1] = 0.f;
+	    srot_(ihi, &a[jrow * a_dim1 + 1], &c__1, &a[(jrow - 1) * a_dim1 + 
+		    1], &c__1, &c__, &s);
+	    i__3 = jrow - 1;
+	    srot_(&i__3, &b[jrow * b_dim1 + 1], &c__1, &b[(jrow - 1) * b_dim1 
+		    + 1], &c__1, &c__, &s);
+	    if (ilz) {
+		srot_(n, &z__[jrow * z_dim1 + 1], &c__1, &z__[(jrow - 1) * 
+			z_dim1 + 1], &c__1, &c__, &s);
+	    }
+/* L30: */
+	}
+/* L40: */
+    }
+
+    return 0;
+
+/*     End of SGGHRD */
+
+} /* sgghrd_ */
+
diff --git a/lapack-netlib/SRC/sgglse.c b/lapack-netlib/SRC/sgglse.c
new file mode 100644
index 000000000..bf637879d
--- /dev/null
+++ b/lapack-netlib/SRC/sgglse.c
@@ -0,0 +1,786 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static real c_b31 = -1.f;
+static real c_b33 = 1.f;
+
+/* > \brief <b> SGGLSE solves overdetermined or underdetermined systems for OTHER matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGLSE + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgglse.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgglse.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgglse.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, */
+/*                          INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
+/*       REAL               A( LDA, * ), B( LDB, * ), C( * ), D( * ), */
+/*      $                   WORK( * ), X( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGLSE solves the linear equality-constrained least squares (LSE) */
+/* > problem: */
+/* > */
+/* >         minimize || c - A*x ||_2   subject to   B*x = d */
+/* > */
+/* > where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */
+/* > M-vector, and d is a given P-vector. It is assumed that */
+/* > P <= N <= M+P, and */
+/* > */
+/* >          rank(B) = P and  rank( (A) ) = N. */
+/* >                               ( (B) ) */
+/* > */
+/* > These conditions ensure that the LSE problem has a unique solution, */
+/* > which is obtained using a generalized RQ factorization of the */
+/* > matrices (B, A) given by */
+/* > */
+/* >    B = (0 R)*Q,   A = Z*T*Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrices A and B. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] P */
+/* > \verbatim */
+/* >          P is INTEGER */
+/* >          The number of rows of the matrix B. 0 <= P <= N <= M+P. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,N) */
+/* >          On entry, the P-by-N matrix B. */
+/* >          On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */
+/* >          contains the P-by-P upper triangular matrix R. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (M) */
+/* >          On entry, C contains the right hand side vector for the */
+/* >          least squares part of the LSE problem. */
+/* >          On exit, the residual sum of squares for the solution */
+/* >          is given by the sum of squares of elements N-P+1 to M of */
+/* >          vector C. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (P) */
+/* >          On entry, D contains the right hand side vector for the */
+/* >          constrained equation. */
+/* >          On exit, D is destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension (N) */
+/* >          On exit, X is the solution of the LSE problem. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= f2cmax(1,M+N+P). */
+/* >          For optimum performance LWORK >= P+f2cmin(M,N)+f2cmax(M,N)*NB, */
+/* >          where NB is an upper bound for the optimal blocksizes for */
+/* >          SGEQRF, SGERQF, SORMQR and SORMRQ. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          = 1:  the upper triangular factor R associated with B in the */
+/* >                generalized RQ factorization of the pair (B, A) is */
+/* >                singular, so that rank(B) < P; the least squares */
+/* >                solution could not be computed. */
+/* >          = 2:  the (N-P) by (N-P) part of the upper trapezoidal factor */
+/* >                T associated with A in the generalized RQ factorization */
+/* >                of the pair (B, A) is singular, so that */
+/* >                rank( (A) ) < N; the least squares solution could not */
+/* >                    ( (B) ) */
+/* >                be computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realOTHERsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgglse_(integer *m, integer *n, integer *p, real *a, 
+	integer *lda, real *b, integer *ldb, real *c__, real *d__, real *x, 
+	real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    integer lopt;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), 
+	    saxpy_(integer *, real *, real *, integer *, real *, integer *), 
+	    strmv_(char *, char *, char *, integer *, real *, integer *, real 
+	    *, integer *);
+    integer nb, mn, nr;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sggrqf_(integer *, integer *, integer *, real 
+	    *, integer *, real *, real *, integer *, real *, real *, integer *
+	    , integer *);
+    integer lwkmin, nb1, nb2, nb3, nb4, lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *), sormrq_(char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , integer *, real *, integer *, integer *), 
+	    strtrs_(char *, char *, char *, integer *, integer *, real *, 
+	    integer *, real *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --c__;
+    --d__;
+    --x;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    mn = f2cmin(*m,*n);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*p < 0 || *p > *n || *p < *n - *m) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,*p)) {
+	*info = -7;
+    }
+
+/*     Calculate workspace */
+
+    if (*info == 0) {
+	if (*n == 0) {
+	    lwkmin = 1;
+	    lwkopt = 1;
+	} else {
+	    nb1 = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
+		     (ftnlen)1);
+	    nb2 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
+		     (ftnlen)1);
+	    nb3 = ilaenv_(&c__1, "SORMQR", " ", m, n, p, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	    nb4 = ilaenv_(&c__1, "SORMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+/* Computing MAX */
+	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
+	    nb = f2cmax(i__1,nb4);
+	    lwkmin = *m + *n + *p;
+	    lwkopt = *p + mn + f2cmax(*m,*n) * nb;
+	}
+	work[1] = (real) lwkopt;
+
+	if (*lwork < lwkmin && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGLSE", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Compute the GRQ factorization of matrices B and A: */
+
+/*            B*Q**T = (  0  T12 ) P   Z**T*A*Q**T = ( R11 R12 ) N-P */
+/*                        N-P  P                     (  0  R22 ) M+P-N */
+/*                                                      N-P  P */
+
+/*     where T12 and R11 are upper triangular, and Q and Z are */
+/*     orthogonal. */
+
+    i__1 = *lwork - *p - mn;
+    sggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p 
+	    + 1], &work[*p + mn + 1], &i__1, info);
+    lopt = work[*p + mn + 1];
+
+/*     Update c = Z**T *c = ( c1 ) N-P */
+/*                          ( c2 ) M+P-N */
+
+    i__1 = f2cmax(1,*m);
+    i__2 = *lwork - *p - mn;
+    sormqr_("Left", "Transpose", m, &c__1, &mn, &a[a_offset], lda, &work[*p + 
+	    1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info);
+/* Computing MAX */
+    i__1 = lopt, i__2 = (integer) work[*p + mn + 1];
+    lopt = f2cmax(i__1,i__2);
+
+/*     Solve T12*x2 = d for x2 */
+
+    if (*p > 0) {
+	strtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + 
+		1) * b_dim1 + 1], ldb, &d__[1], p, info);
+
+	if (*info > 0) {
+	    *info = 1;
+	    return 0;
+	}
+
+/*        Put the solution in X */
+
+	scopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1);
+
+/*        Update c1 */
+
+	i__1 = *n - *p;
+	sgemv_("No transpose", &i__1, p, &c_b31, &a[(*n - *p + 1) * a_dim1 + 
+		1], lda, &d__[1], &c__1, &c_b33, &c__[1], &c__1);
+    }
+
+/*     Solve R11*x1 = c1 for x1 */
+
+    if (*n > *p) {
+	i__1 = *n - *p;
+	i__2 = *n - *p;
+	strtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[
+		a_offset], lda, &c__[1], &i__2, info);
+
+	if (*info > 0) {
+	    *info = 2;
+	    return 0;
+	}
+
+/*        Put the solutions in X */
+
+	i__1 = *n - *p;
+	scopy_(&i__1, &c__[1], &c__1, &x[1], &c__1);
+    }
+
+/*     Compute the residual vector: */
+
+    if (*m < *n) {
+	nr = *m + *p - *n;
+	if (nr > 0) {
+	    i__1 = *n - *m;
+	    sgemv_("No transpose", &nr, &i__1, &c_b31, &a[*n - *p + 1 + (*m + 
+		    1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b33, &c__[*n - 
+		    *p + 1], &c__1);
+	}
+    } else {
+	nr = *p;
+    }
+    if (nr > 0) {
+	strmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n 
+		- *p + 1) * a_dim1], lda, &d__[1], &c__1);
+	saxpy_(&nr, &c_b31, &d__[1], &c__1, &c__[*n - *p + 1], &c__1);
+    }
+
+/*     Backward transformation x = Q**T*x */
+
+    i__1 = *lwork - *p - mn;
+    sormrq_("Left", "Transpose", n, &c__1, p, &b[b_offset], ldb, &work[1], &x[
+	    1], n, &work[*p + mn + 1], &i__1, info);
+/* Computing MAX */
+    i__1 = lopt, i__2 = (integer) work[*p + mn + 1];
+    work[1] = (real) (*p + mn + f2cmax(i__1,i__2));
+
+    return 0;
+
+/*     End of SGGLSE */
+
+} /* sgglse_ */
+
diff --git a/lapack-netlib/SRC/sggqrf.c b/lapack-netlib/SRC/sggqrf.c
new file mode 100644
index 000000000..d922cf90f
--- /dev/null
+++ b/lapack-netlib/SRC/sggqrf.c
@@ -0,0 +1,718 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+
+/* > \brief \b SGGQRF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGQRF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggqrf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggqrf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggqrf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, */
+/*                          LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
+/*       REAL               A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */
+/*      $                   WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGQRF computes a generalized QR factorization of an N-by-M matrix A */
+/* > and an N-by-P matrix B: */
+/* > */
+/* >             A = Q*R,        B = Q*T*Z, */
+/* > */
+/* > where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */
+/* > matrix, and R and T assume one of the forms: */
+/* > */
+/* > if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N, */
+/* >                 (  0  ) N-M                         N   M-N */
+/* >                    M */
+/* > */
+/* > where R11 is upper triangular, and */
+/* > */
+/* > if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P, */
+/* >                  P-N  N                           ( T21 ) P */
+/* >                                                      P */
+/* > */
+/* > where T12 or T21 is upper triangular. */
+/* > */
+/* > In particular, if B is square and nonsingular, the GQR factorization */
+/* > of A and B implicitly gives the QR factorization of inv(B)*A: */
+/* > */
+/* >              inv(B)*A = Z**T*(inv(T)*R) */
+/* > */
+/* > where inv(B) denotes the inverse of the matrix B, and Z**T denotes the */
+/* > transpose of the matrix Z. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of rows of the matrices A and B. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of columns of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] P */
+/* > \verbatim */
+/* >          P is INTEGER */
+/* >          The number of columns of the matrix B.  P >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,M) */
+/* >          On entry, the N-by-M matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(N,M)-by-M upper trapezoidal matrix R (R is */
+/* >          upper triangular if N >= M); the elements below the diagonal, */
+/* >          with the array TAUA, represent the orthogonal matrix Q as a */
+/* >          product of f2cmin(N,M) elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUA */
+/* > \verbatim */
+/* >          TAUA is REAL array, dimension (f2cmin(N,M)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix Q (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,P) */
+/* >          On entry, the N-by-P matrix B. */
+/* >          On exit, if N <= P, the upper triangle of the subarray */
+/* >          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */
+/* >          if N > P, the elements on and above the (N-P)-th subdiagonal */
+/* >          contain the N-by-P upper trapezoidal matrix T; the remaining */
+/* >          elements, with the array TAUB, represent the orthogonal */
+/* >          matrix Z as a product of elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUB */
+/* > \verbatim */
+/* >          TAUB is REAL array, dimension (f2cmin(N,P)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix Z (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */
+/* >          For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */
+/* >          where NB1 is the optimal blocksize for the QR factorization */
+/* >          of an N-by-M matrix, NB2 is the optimal blocksize for the */
+/* >          RQ factorization of an N-by-P matrix, and NB3 is the optimal */
+/* >          blocksize for a call of SORMQR. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realOTHERcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(n,m). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - taua * v * v**T */
+/* > */
+/* >  where taua is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */
+/* >  and taua in TAUA(i). */
+/* >  To form Q explicitly, use LAPACK subroutine SORGQR. */
+/* >  To use Q to update another matrix, use LAPACK subroutine SORMQR. */
+/* > */
+/* >  The matrix Z is represented as a product of elementary reflectors */
+/* > */
+/* >     Z = H(1) H(2) . . . H(k), where k = f2cmin(n,p). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - taub * v * v**T */
+/* > */
+/* >  where taub is a real scalar, and v is a real vector with */
+/* >  v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in */
+/* >  B(n-k+i,1:p-k+i-1), and taub in TAUB(i). */
+/* >  To form Z explicitly, use LAPACK subroutine SORGRQ. */
+/* >  To use Z to update another matrix, use LAPACK subroutine SORMRQ. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sggqrf_(integer *n, integer *m, integer *p, real *a, 
+	integer *lda, real *taua, real *b, integer *ldb, real *taub, real *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    integer lopt, nb;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *), sgerqf_(integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, integer *
+	    );
+    integer nb1, nb2, nb3, lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --taua;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --taub;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    nb2 = ilaenv_(&c__1, "SGERQF", " ", n, p, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    nb3 = ilaenv_(&c__1, "SORMQR", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+    i__1 = f2cmax(nb1,nb2);
+    nb = f2cmax(i__1,nb3);
+/* Computing MAX */
+    i__1 = f2cmax(*n,*m);
+    lwkopt = f2cmax(i__1,*p) * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*p < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -8;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*n), i__1 = f2cmax(i__1,*m);
+	if (*lwork < f2cmax(i__1,*p) && ! lquery) {
+	    *info = -11;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGQRF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     QR factorization of N-by-M matrix A: A = Q*R */
+
+    sgeqrf_(n, m, &a[a_offset], lda, &taua[1], &work[1], lwork, info);
+    lopt = work[1];
+
+/*     Update B := Q**T*B. */
+
+    i__1 = f2cmin(*n,*m);
+    sormqr_("Left", "Transpose", n, p, &i__1, &a[a_offset], lda, &taua[1], &b[
+	    b_offset], ldb, &work[1], lwork, info);
+/* Computing MAX */
+    i__1 = lopt, i__2 = (integer) work[1];
+    lopt = f2cmax(i__1,i__2);
+
+/*     RQ factorization of N-by-P matrix B: B = T*Z. */
+
+    sgerqf_(n, p, &b[b_offset], ldb, &taub[1], &work[1], lwork, info);
+/* Computing MAX */
+    i__1 = lopt, i__2 = (integer) work[1];
+    work[1] = (real) f2cmax(i__1,i__2);
+
+    return 0;
+
+/*     End of SGGQRF */
+
+} /* sggqrf_ */
+
diff --git a/lapack-netlib/SRC/sggrqf.c b/lapack-netlib/SRC/sggrqf.c
new file mode 100644
index 000000000..5780cfdb1
--- /dev/null
+++ b/lapack-netlib/SRC/sggrqf.c
@@ -0,0 +1,719 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+
+/* > \brief \b SGGRQF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGRQF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggrqf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggrqf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggrqf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, */
+/*                          LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, P */
+/*       REAL               A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */
+/*      $                   WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGRQF computes a generalized RQ factorization of an M-by-N matrix A */
+/* > and a P-by-N matrix B: */
+/* > */
+/* >             A = R*Q,        B = Z*T*Q, */
+/* > */
+/* > where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */
+/* > matrix, and R and T assume one of the forms: */
+/* > */
+/* > if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N, */
+/* >                  N-M  M                           ( R21 ) N */
+/* >                                                      N */
+/* > */
+/* > where R12 or R21 is upper triangular, and */
+/* > */
+/* > if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P, */
+/* >                 (  0  ) P-N                         P   N-P */
+/* >                    N */
+/* > */
+/* > where T11 is upper triangular. */
+/* > */
+/* > In particular, if B is square and nonsingular, the GRQ factorization */
+/* > of A and B implicitly gives the RQ factorization of A*inv(B): */
+/* > */
+/* >              A*inv(B) = (R*inv(T))*Z**T */
+/* > */
+/* > where inv(B) denotes the inverse of the matrix B, and Z**T denotes the */
+/* > transpose of the matrix Z. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] P */
+/* > \verbatim */
+/* >          P is INTEGER */
+/* >          The number of rows of the matrix B.  P >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrices A and B. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, if M <= N, the upper triangle of the subarray */
+/* >          A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; */
+/* >          if M > N, the elements on and above the (M-N)-th subdiagonal */
+/* >          contain the M-by-N upper trapezoidal matrix R; the remaining */
+/* >          elements, with the array TAUA, represent the orthogonal */
+/* >          matrix Q as a product of elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUA */
+/* > \verbatim */
+/* >          TAUA is REAL array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix Q (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,N) */
+/* >          On entry, the P-by-N matrix B. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(P,N)-by-N upper trapezoidal matrix T (T is */
+/* >          upper triangular if P >= N); the elements below the diagonal, */
+/* >          with the array TAUB, represent the orthogonal matrix Z as a */
+/* >          product of elementary reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUB */
+/* > \verbatim */
+/* >          TAUB is REAL array, dimension (f2cmin(P,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix Z (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */
+/* >          For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */
+/* >          where NB1 is the optimal blocksize for the RQ factorization */
+/* >          of an M-by-N matrix, NB2 is the optimal blocksize for the */
+/* >          QR factorization of a P-by-N matrix, and NB3 is the optimal */
+/* >          blocksize for a call of SORMRQ. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INF0= -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realOTHERcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - taua * v * v**T */
+/* > */
+/* >  where taua is a real scalar, and v is a real vector with */
+/* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
+/* >  A(m-k+i,1:n-k+i-1), and taua in TAUA(i). */
+/* >  To form Q explicitly, use LAPACK subroutine SORGRQ. */
+/* >  To use Q to update another matrix, use LAPACK subroutine SORMRQ. */
+/* > */
+/* >  The matrix Z is represented as a product of elementary reflectors */
+/* > */
+/* >     Z = H(1) H(2) . . . H(k), where k = f2cmin(p,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - taub * v * v**T */
+/* > */
+/* >  where taub is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), */
+/* >  and taub in TAUB(i). */
+/* >  To form Z explicitly, use LAPACK subroutine SORGQR. */
+/* >  To use Z to update another matrix, use LAPACK subroutine SORMQR. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sggrqf_(integer *m, integer *p, integer *n, real *a, 
+	integer *lda, real *taua, real *b, integer *ldb, real *taub, real *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer lopt, nb;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
+	    *, real *, real *, integer *, integer *), sgerqf_(integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, integer *
+	    );
+    integer nb1, nb2, nb3, lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int sormrq_(char *, char *, integer *, integer *, 
+	    integer *, real *, integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --taua;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --taub;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb1 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    nb2 = ilaenv_(&c__1, "SGEQRF", " ", p, n, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    nb3 = ilaenv_(&c__1, "SORMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+    i__1 = f2cmax(nb1,nb2);
+    nb = f2cmax(i__1,nb3);
+/* Computing MAX */
+    i__1 = f2cmax(*n,*m);
+    lwkopt = f2cmax(i__1,*p) * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*p < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,*p)) {
+	*info = -8;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*m), i__1 = f2cmax(i__1,*p);
+	if (*lwork < f2cmax(i__1,*n) && ! lquery) {
+	    *info = -11;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGRQF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     RQ factorization of M-by-N matrix A: A = R*Q */
+
+    sgerqf_(m, n, &a[a_offset], lda, &taua[1], &work[1], lwork, info);
+    lopt = work[1];
+
+/*     Update B := B*Q**T */
+
+    i__1 = f2cmin(*m,*n);
+/* Computing MAX */
+    i__2 = 1, i__3 = *m - *n + 1;
+    sormrq_("Right", "Transpose", p, n, &i__1, &a[f2cmax(i__2,i__3) + a_dim1], 
+	    lda, &taua[1], &b[b_offset], ldb, &work[1], lwork, info);
+/* Computing MAX */
+    i__1 = lopt, i__2 = (integer) work[1];
+    lopt = f2cmax(i__1,i__2);
+
+/*     QR factorization of P-by-N matrix B: B = Z*T */
+
+    sgeqrf_(p, n, &b[b_offset], ldb, &taub[1], &work[1], lwork, info);
+/* Computing MAX */
+    i__1 = lopt, i__2 = (integer) work[1];
+    work[1] = (real) f2cmax(i__1,i__2);
+
+    return 0;
+
+/*     End of SGGRQF */
+
+} /* sggrqf_ */
+
diff --git a/lapack-netlib/SRC/sggsvd3.c b/lapack-netlib/SRC/sggsvd3.c
new file mode 100644
index 000000000..081ba6fac
--- /dev/null
+++ b/lapack-netlib/SRC/sggsvd3.c
@@ -0,0 +1,936 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__1 = 1;
+
+/* > \brief <b> SGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGSVD3 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggsvd3
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggsvd3
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggsvd3
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, */
+/*                           LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, */
+/*                           LWORK, IWORK, INFO ) */
+
+/*       CHARACTER          JOBQ, JOBU, JOBV */
+/*       INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */
+/*       INTEGER            IWORK( * ) */
+/*       REAL               A( LDA, * ), ALPHA( * ), B( LDB, * ), */
+/*      $                   BETA( * ), Q( LDQ, * ), U( LDU, * ), */
+/*      $                   V( LDV, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGSVD3 computes the generalized singular value decomposition (GSVD) */
+/* > of an M-by-N real matrix A and P-by-N real matrix B: */
+/* > */
+/* >       U**T*A*Q = D1*( 0 R ),    V**T*B*Q = D2*( 0 R ) */
+/* > */
+/* > where U, V and Q are orthogonal matrices. */
+/* > Let K+L = the effective numerical rank of the matrix (A**T,B**T)**T, */
+/* > then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and */
+/* > D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the */
+/* > following structures, respectively: */
+/* > */
+/* > If M-K-L >= 0, */
+/* > */
+/* >                     K  L */
+/* >        D1 =     K ( I  0 ) */
+/* >                 L ( 0  C ) */
+/* >             M-K-L ( 0  0 ) */
+/* > */
+/* >                   K  L */
+/* >        D2 =   L ( 0  S ) */
+/* >             P-L ( 0  0 ) */
+/* > */
+/* >                 N-K-L  K    L */
+/* >   ( 0 R ) = K (  0   R11  R12 ) */
+/* >             L (  0    0   R22 ) */
+/* > */
+/* > where */
+/* > */
+/* >   C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */
+/* >   S = diag( BETA(K+1),  ... , BETA(K+L) ), */
+/* >   C**2 + S**2 = I. */
+/* > */
+/* >   R is stored in A(1:K+L,N-K-L+1:N) on exit. */
+/* > */
+/* > If M-K-L < 0, */
+/* > */
+/* >                   K M-K K+L-M */
+/* >        D1 =   K ( I  0    0   ) */
+/* >             M-K ( 0  C    0   ) */
+/* > */
+/* >                     K M-K K+L-M */
+/* >        D2 =   M-K ( 0  S    0  ) */
+/* >             K+L-M ( 0  0    I  ) */
+/* >               P-L ( 0  0    0  ) */
+/* > */
+/* >                    N-K-L  K   M-K  K+L-M */
+/* >   ( 0 R ) =     K ( 0    R11  R12  R13  ) */
+/* >               M-K ( 0     0   R22  R23  ) */
+/* >             K+L-M ( 0     0    0   R33  ) */
+/* > */
+/* > where */
+/* > */
+/* >   C = diag( ALPHA(K+1), ... , ALPHA(M) ), */
+/* >   S = diag( BETA(K+1),  ... , BETA(M) ), */
+/* >   C**2 + S**2 = I. */
+/* > */
+/* >   (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored */
+/* >   ( 0  R22 R23 ) */
+/* >   in B(M-K+1:L,N+M-K-L+1:N) on exit. */
+/* > */
+/* > The routine computes C, S, R, and optionally the orthogonal */
+/* > transformation matrices U, V and Q. */
+/* > */
+/* > In particular, if B is an N-by-N nonsingular matrix, then the GSVD of */
+/* > A and B implicitly gives the SVD of A*inv(B): */
+/* >                      A*inv(B) = U*(D1*inv(D2))*V**T. */
+/* > If ( A**T,B**T)**T  has orthonormal columns, then the GSVD of A and B is */
+/* > also equal to the CS decomposition of A and B. Furthermore, the GSVD */
+/* > can be used to derive the solution of the eigenvalue problem: */
+/* >                      A**T*A x = lambda* B**T*B x. */
+/* > In some literature, the GSVD of A and B is presented in the form */
+/* >                  U**T*A*X = ( 0 D1 ),   V**T*B*X = ( 0 D2 ) */
+/* > where U and V are orthogonal and X is nonsingular, D1 and D2 are */
+/* > ``diagonal''.  The former GSVD form can be converted to the latter */
+/* > form by taking the nonsingular matrix X as */
+/* > */
+/* >                      X = Q*( I   0    ) */
+/* >                            ( 0 inv(R) ). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          = 'U':  Orthogonal matrix U is computed; */
+/* >          = 'N':  U is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >          = 'V':  Orthogonal matrix V is computed; */
+/* >          = 'N':  V is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBQ */
+/* > \verbatim */
+/* >          JOBQ is CHARACTER*1 */
+/* >          = 'Q':  Orthogonal matrix Q is computed; */
+/* >          = 'N':  Q is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrices A and B.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] P */
+/* > \verbatim */
+/* >          P is INTEGER */
+/* >          The number of rows of the matrix B.  P >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[out] L */
+/* > \verbatim */
+/* >          L is INTEGER */
+/* > */
+/* >          On exit, K and L specify the dimension of the subblocks */
+/* >          described in Purpose. */
+/* >          K + L = effective numerical rank of (A**T,B**T)**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, A contains the triangular matrix R, or part of R. */
+/* >          See Purpose for details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,N) */
+/* >          On entry, the P-by-N matrix B. */
+/* >          On exit, B contains the triangular matrix R if M-K-L < 0. */
+/* >          See Purpose for details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHA */
+/* > \verbatim */
+/* >          ALPHA is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BETA */
+/* > \verbatim */
+/* >          BETA is REAL array, dimension (N) */
+/* > */
+/* >          On exit, ALPHA and BETA contain the generalized singular */
+/* >          value pairs of A and B; */
+/* >            ALPHA(1:K) = 1, */
+/* >            BETA(1:K)  = 0, */
+/* >          and if M-K-L >= 0, */
+/* >            ALPHA(K+1:K+L) = C, */
+/* >            BETA(K+1:K+L)  = S, */
+/* >          or if M-K-L < 0, */
+/* >            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 */
+/* >            BETA(K+1:M) =S, BETA(M+1:K+L) =1 */
+/* >          and */
+/* >            ALPHA(K+L+1:N) = 0 */
+/* >            BETA(K+L+1:N)  = 0 */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is REAL array, dimension (LDU,M) */
+/* >          If JOBU = 'U', U contains the M-by-M orthogonal matrix U. */
+/* >          If JOBU = 'N', U is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U. LDU >= f2cmax(1,M) if */
+/* >          JOBU = 'U'; LDU >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension (LDV,P) */
+/* >          If JOBV = 'V', V contains the P-by-P orthogonal matrix V. */
+/* >          If JOBV = 'N', V is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. LDV >= f2cmax(1,P) if */
+/* >          JOBV = 'V'; LDV >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] Q */
+/* > \verbatim */
+/* >          Q is REAL array, dimension (LDQ,N) */
+/* >          If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. */
+/* >          If JOBQ = 'N', Q is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDQ */
+/* > \verbatim */
+/* >          LDQ is INTEGER */
+/* >          The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */
+/* >          JOBQ = 'Q'; LDQ >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* >          On exit, IWORK stores the sorting information. More */
+/* >          precisely, the following loop will sort ALPHA */
+/* >             for I = K+1, f2cmin(M,K+L) */
+/* >                 swap ALPHA(I) and ALPHA(IWORK(I)) */
+/* >             endfor */
+/* >          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if INFO = 1, the Jacobi-type procedure failed to */
+/* >                converge.  For further details, see subroutine STGSJA. */
+/* > \endverbatim */
+
+/* > \par Internal Parameters: */
+/*  ========================= */
+/* > */
+/* > \verbatim */
+/* >  TOLA    REAL */
+/* >  TOLB    REAL */
+/* >          TOLA and TOLB are the thresholds to determine the effective */
+/* >          rank of (A**T,B**T)**T. Generally, they are set to */
+/* >                   TOLA = MAX(M,N)*norm(A)*MACHEPS, */
+/* >                   TOLB = MAX(P,N)*norm(B)*MACHEPS. */
+/* >          The size of TOLA and TOLB may affect the size of backward */
+/* >          errors of the decomposition. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date August 2015 */
+
+/* > \ingroup realGEsing */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >     Ming Gu and Huan Ren, Computer Science Division, University of */
+/* >     California at Berkeley, USA */
+/* > */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* >  SGGSVD3 replaces the deprecated subroutine SGGSVD. */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sggsvd3_(char *jobu, char *jobv, char *jobq, integer *m, 
+	integer *n, integer *p, integer *k, integer *l, real *a, integer *lda,
+	 real *b, integer *ldb, real *alpha, real *beta, real *u, integer *
+	ldu, real *v, integer *ldv, real *q, integer *ldq, real *work, 
+	integer *lwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, 
+	    u_offset, v_dim1, v_offset, i__1, i__2;
+
+    /* Local variables */
+    integer ibnd;
+    real tola;
+    integer isub;
+    real tolb, unfl, temp, smax;
+    integer ncallmycycle, i__, j;
+    extern logical lsame_(char *, char *);
+    real anorm, bnorm;
+    logical wantq;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    logical wantu, wantv;
+    extern real slamch_(char *), slange_(char *, integer *, integer *,
+	     real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), stgsja_(
+	    char *, char *, char *, integer *, integer *, integer *, integer *
+	    , integer *, real *, integer *, real *, integer *, real *, real *,
+	     real *, real *, real *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, integer *);
+    integer lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int sggsvp3_(char *, char *, char *, integer *, 
+	    integer *, integer *, real *, integer *, real *, integer *, real *
+	    , real *, integer *, integer *, real *, integer *, real *, 
+	    integer *, real *, integer *, integer *, real *, real *, integer *
+	    , integer *);
+    real ulp;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     August 2015 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode and test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --alpha;
+    --beta;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1 * 1;
+    q -= q_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    wantu = lsame_(jobu, "U");
+    wantv = lsame_(jobv, "V");
+    wantq = lsame_(jobq, "Q");
+    lquery = *lwork == -1;
+    lwkopt = 1;
+
+/*     Test the input arguments */
+
+    *info = 0;
+    if (! (wantu || lsame_(jobu, "N"))) {
+	*info = -1;
+    } else if (! (wantv || lsame_(jobv, "N"))) {
+	*info = -2;
+    } else if (! (wantq || lsame_(jobq, "N"))) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*p < 0) {
+	*info = -6;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -10;
+    } else if (*ldb < f2cmax(1,*p)) {
+	*info = -12;
+    } else if (*ldu < 1 || wantu && *ldu < *m) {
+	*info = -16;
+    } else if (*ldv < 1 || wantv && *ldv < *p) {
+	*info = -18;
+    } else if (*ldq < 1 || wantq && *ldq < *n) {
+	*info = -20;
+    } else if (*lwork < 1 && ! lquery) {
+	*info = -24;
+    }
+
+/*     Compute workspace */
+
+    if (*info == 0) {
+	sggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], 
+		ldb, &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv,
+		 &q[q_offset], ldq, &iwork[1], &work[1], &work[1], &c_n1, 
+		info);
+	lwkopt = *n + (integer) work[1];
+/* Computing MAX */
+	i__1 = *n << 1;
+	lwkopt = f2cmax(i__1,lwkopt);
+	lwkopt = f2cmax(1,lwkopt);
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGSVD3", &i__1, (ftnlen)7);
+	return 0;
+    }
+    if (lquery) {
+	return 0;
+    }
+
+/*     Compute the Frobenius norm of matrices A and B */
+
+    anorm = slange_("1", m, n, &a[a_offset], lda, &work[1]);
+    bnorm = slange_("1", p, n, &b[b_offset], ldb, &work[1]);
+
+/*     Get machine precision and set up threshold for determining */
+/*     the effective numerical rank of the matrices A and B. */
+
+    ulp = slamch_("Precision");
+    unfl = slamch_("Safe Minimum");
+    tola = f2cmax(*m,*n) * f2cmax(anorm,unfl) * ulp;
+    tolb = f2cmax(*p,*n) * f2cmax(bnorm,unfl) * ulp;
+
+/*     Preprocessing */
+
+    i__1 = *lwork - *n;
+    sggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], ldb, 
+	    &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, &q[
+	    q_offset], ldq, &iwork[1], &work[1], &work[*n + 1], &i__1, info);
+
+/*     Compute the GSVD of two upper "triangular" matrices */
+
+    stgsja_(jobu, jobv, jobq, m, p, n, k, l, &a[a_offset], lda, &b[b_offset], 
+	    ldb, &tola, &tolb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[
+	    v_offset], ldv, &q[q_offset], ldq, &work[1], &ncallmycycle, info);
+
+/*     Sort the singular values and store the pivot indices in IWORK */
+/*     Copy ALPHA to WORK, then sort ALPHA in WORK */
+
+    scopy_(n, &alpha[1], &c__1, &work[1], &c__1);
+/* Computing MIN */
+    i__1 = *l, i__2 = *m - *k;
+    ibnd = f2cmin(i__1,i__2);
+    i__1 = ibnd;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Scan for largest ALPHA(K+I) */
+
+	isub = i__;
+	smax = work[*k + i__];
+	i__2 = ibnd;
+	for (j = i__ + 1; j <= i__2; ++j) {
+	    temp = work[*k + j];
+	    if (temp > smax) {
+		isub = j;
+		smax = temp;
+	    }
+/* L10: */
+	}
+	if (isub != i__) {
+	    work[*k + isub] = work[*k + i__];
+	    work[*k + i__] = smax;
+	    iwork[*k + i__] = *k + isub;
+	} else {
+	    iwork[*k + i__] = *k + i__;
+	}
+/* L20: */
+    }
+
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SGGSVD3 */
+
+} /* sggsvd3_ */
+
diff --git a/lapack-netlib/SRC/sggsvp3.c b/lapack-netlib/SRC/sggsvp3.c
new file mode 100644
index 000000000..99899fa8a
--- /dev/null
+++ b/lapack-netlib/SRC/sggsvp3.c
@@ -0,0 +1,1056 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static real c_b14 = 0.f;
+static real c_b24 = 1.f;
+
+/* > \brief \b SGGSVP3 */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGGSVP3 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggsvp3
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggsvp3
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggsvp3
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, */
+/*                           TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, */
+/*                           IWORK, TAU, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          JOBQ, JOBU, JOBV */
+/*       INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */
+/*       REAL               TOLA, TOLB */
+/*       INTEGER            IWORK( * ) */
+/*       REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */
+/*      $                   TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGGSVP3 computes orthogonal matrices U, V and Q such that */
+/* > */
+/* >                    N-K-L  K    L */
+/* >  U**T*A*Q =     K ( 0    A12  A13 )  if M-K-L >= 0; */
+/* >                 L ( 0     0   A23 ) */
+/* >             M-K-L ( 0     0    0  ) */
+/* > */
+/* >                  N-K-L  K    L */
+/* >         =     K ( 0    A12  A13 )  if M-K-L < 0; */
+/* >             M-K ( 0     0   A23 ) */
+/* > */
+/* >                  N-K-L  K    L */
+/* >  V**T*B*Q =   L ( 0     0   B13 ) */
+/* >             P-L ( 0     0    0  ) */
+/* > */
+/* > where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */
+/* > upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */
+/* > otherwise A23 is (M-K)-by-L upper trapezoidal.  K+L = the effective */
+/* > numerical rank of the (M+P)-by-N matrix (A**T,B**T)**T. */
+/* > */
+/* > This decomposition is the preprocessing step for computing the */
+/* > Generalized Singular Value Decomposition (GSVD), see subroutine */
+/* > SGGSVD3. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          = 'U':  Orthogonal matrix U is computed; */
+/* >          = 'N':  U is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >          = 'V':  Orthogonal matrix V is computed; */
+/* >          = 'N':  V is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBQ */
+/* > \verbatim */
+/* >          JOBQ is CHARACTER*1 */
+/* >          = 'Q':  Orthogonal matrix Q is computed; */
+/* >          = 'N':  Q is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] P */
+/* > \verbatim */
+/* >          P is INTEGER */
+/* >          The number of rows of the matrix B.  P >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrices A and B.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, A contains the triangular (or trapezoidal) matrix */
+/* >          described in the Purpose section. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,N) */
+/* >          On entry, the P-by-N matrix B. */
+/* >          On exit, B contains the triangular matrix described in */
+/* >          the Purpose section. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,P). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TOLA */
+/* > \verbatim */
+/* >          TOLA is REAL */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TOLB */
+/* > \verbatim */
+/* >          TOLB is REAL */
+/* > */
+/* >          TOLA and TOLB are the thresholds to determine the effective */
+/* >          numerical rank of matrix B and a subblock of A. Generally, */
+/* >          they are set to */
+/* >             TOLA = MAX(M,N)*norm(A)*MACHEPS, */
+/* >             TOLB = MAX(P,N)*norm(B)*MACHEPS. */
+/* >          The size of TOLA and TOLB may affect the size of backward */
+/* >          errors of the decomposition. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[out] L */
+/* > \verbatim */
+/* >          L is INTEGER */
+/* > */
+/* >          On exit, K and L specify the dimension of the subblocks */
+/* >          described in Purpose section. */
+/* >          K + L = effective numerical rank of (A**T,B**T)**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is REAL array, dimension (LDU,M) */
+/* >          If JOBU = 'U', U contains the orthogonal matrix U. */
+/* >          If JOBU = 'N', U is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U. LDU >= f2cmax(1,M) if */
+/* >          JOBU = 'U'; LDU >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension (LDV,P) */
+/* >          If JOBV = 'V', V contains the orthogonal matrix V. */
+/* >          If JOBV = 'N', V is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. LDV >= f2cmax(1,P) if */
+/* >          JOBV = 'V'; LDV >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] Q */
+/* > \verbatim */
+/* >          Q is REAL array, dimension (LDQ,N) */
+/* >          If JOBQ = 'Q', Q contains the orthogonal matrix Q. */
+/* >          If JOBQ = 'N', Q is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDQ */
+/* > \verbatim */
+/* >          LDQ is INTEGER */
+/* >          The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */
+/* >          JOBQ = 'Q'; LDQ >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date August 2015 */
+
+/* > \ingroup realOTHERcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The subroutine uses LAPACK subroutine SGEQP3 for the QR factorization */
+/* >  with column pivoting to detect the effective numerical rank of the */
+/* >  a matrix. It may be replaced by a better rank determination strategy. */
+/* > */
+/* >  SGGSVP3 replaces the deprecated subroutine SGGSVP. */
+/* > */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int sggsvp3_(char *jobu, char *jobv, char *jobq, integer *m, 
+	integer *p, integer *n, real *a, integer *lda, real *b, integer *ldb, 
+	real *tola, real *tolb, integer *k, integer *l, real *u, integer *ldu,
+	 real *v, integer *ldv, real *q, integer *ldq, integer *iwork, real *
+	tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, 
+	    u_offset, v_dim1, v_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Local variables */
+    integer i__, j;
+    extern logical lsame_(char *, char *);
+    logical wantq, wantu, wantv;
+    extern /* Subroutine */ int sgeqp3_(integer *, integer *, real *, integer 
+	    *, integer *, real *, real *, integer *, integer *), sgeqr2_(
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    ), sgerq2_(integer *, integer *, real *, integer *, real *, real *
+	    , integer *), sorg2r_(integer *, integer *, integer *, real *, 
+	    integer *, real *, real *, integer *), sorm2r_(char *, char *, 
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , integer *, real *, integer *), sormr2_(char *, 
+	    char *, integer *, integer *, integer *, real *, integer *, real *
+	    , real *, integer *, real *, integer *), xerbla_(
+	    char *, integer *, ftnlen), slacpy_(char *, integer *, integer *, 
+	    real *, integer *, real *, integer *), slaset_(char *, 
+	    integer *, integer *, real *, real *, real *, integer *), 
+	    slapmt_(logical *, integer *, integer *, real *, integer *, 
+	    integer *);
+    logical forwrd;
+    integer lwkopt;
+    logical lquery;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     August 2015 */
+
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1 * 1;
+    q -= q_offset;
+    --iwork;
+    --tau;
+    --work;
+
+    /* Function Body */
+    wantu = lsame_(jobu, "U");
+    wantv = lsame_(jobv, "V");
+    wantq = lsame_(jobq, "Q");
+    forwrd = TRUE_;
+    lquery = *lwork == -1;
+    lwkopt = 1;
+
+/*     Test the input arguments */
+
+    *info = 0;
+    if (! (wantu || lsame_(jobu, "N"))) {
+	*info = -1;
+    } else if (! (wantv || lsame_(jobv, "N"))) {
+	*info = -2;
+    } else if (! (wantq || lsame_(jobq, "N"))) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*p < 0) {
+	*info = -5;
+    } else if (*n < 0) {
+	*info = -6;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -8;
+    } else if (*ldb < f2cmax(1,*p)) {
+	*info = -10;
+    } else if (*ldu < 1 || wantu && *ldu < *m) {
+	*info = -16;
+    } else if (*ldv < 1 || wantv && *ldv < *p) {
+	*info = -18;
+    } else if (*ldq < 1 || wantq && *ldq < *n) {
+	*info = -20;
+    } else if (*lwork < 1 && ! lquery) {
+	*info = -24;
+    }
+
+/*     Compute workspace */
+
+    if (*info == 0) {
+	sgeqp3_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], &c_n1, 
+		info);
+	lwkopt = (integer) work[1];
+	if (wantv) {
+	    lwkopt = f2cmax(lwkopt,*p);
+	}
+/* Computing MAX */
+	i__1 = lwkopt, i__2 = f2cmin(*n,*p);
+	lwkopt = f2cmax(i__1,i__2);
+	lwkopt = f2cmax(lwkopt,*m);
+	if (wantq) {
+	    lwkopt = f2cmax(lwkopt,*n);
+	}
+	sgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], &c_n1, 
+		info);
+/* Computing MAX */
+	i__1 = lwkopt, i__2 = (integer) work[1];
+	lwkopt = f2cmax(i__1,i__2);
+	lwkopt = f2cmax(1,lwkopt);
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGGSVP3", &i__1, (ftnlen)7);
+	return 0;
+    }
+    if (lquery) {
+	return 0;
+    }
+
+/*     QR with column pivoting of B: B*P = V*( S11 S12 ) */
+/*                                           (  0   0  ) */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	iwork[i__] = 0;
+/* L10: */
+    }
+    sgeqp3_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], lwork, 
+	    info);
+
+/*     Update A := A*P */
+
+    slapmt_(&forwrd, m, n, &a[a_offset], lda, &iwork[1]);
+
+/*     Determine the effective rank of matrix B. */
+
+    *l = 0;
+    i__1 = f2cmin(*p,*n);
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = b[i__ + i__ * b_dim1], abs(r__1)) > *tolb) {
+	    ++(*l);
+	}
+/* L20: */
+    }
+
+    if (wantv) {
+
+/*        Copy the details of V, and form V. */
+
+	slaset_("Full", p, p, &c_b14, &c_b14, &v[v_offset], ldv);
+	if (*p > 1) {
+	    i__1 = *p - 1;
+	    slacpy_("Lower", &i__1, n, &b[b_dim1 + 2], ldb, &v[v_dim1 + 2], 
+		    ldv);
+	}
+	i__1 = f2cmin(*p,*n);
+	sorg2r_(p, p, &i__1, &v[v_offset], ldv, &tau[1], &work[1], info);
+    }
+
+/*     Clean up B */
+
+    i__1 = *l - 1;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *l;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    b[i__ + j * b_dim1] = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    if (*p > *l) {
+	i__1 = *p - *l;
+	slaset_("Full", &i__1, n, &c_b14, &c_b14, &b[*l + 1 + b_dim1], ldb);
+    }
+
+    if (wantq) {
+
+/*        Set Q = I and Update Q := Q*P */
+
+	slaset_("Full", n, n, &c_b14, &c_b24, &q[q_offset], ldq);
+	slapmt_(&forwrd, n, n, &q[q_offset], ldq, &iwork[1]);
+    }
+
+    if (*p >= *l && *n != *l) {
+
+/*        RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z */
+
+	sgerq2_(l, n, &b[b_offset], ldb, &tau[1], &work[1], info);
+
+/*        Update A := A*Z**T */
+
+	sormr2_("Right", "Transpose", m, n, l, &b[b_offset], ldb, &tau[1], &a[
+		a_offset], lda, &work[1], info);
+
+	if (wantq) {
+
+/*           Update Q := Q*Z**T */
+
+	    sormr2_("Right", "Transpose", n, n, l, &b[b_offset], ldb, &tau[1],
+		     &q[q_offset], ldq, &work[1], info);
+	}
+
+/*        Clean up B */
+
+	i__1 = *n - *l;
+	slaset_("Full", l, &i__1, &c_b14, &c_b14, &b[b_offset], ldb);
+	i__1 = *n;
+	for (j = *n - *l + 1; j <= i__1; ++j) {
+	    i__2 = *l;
+	    for (i__ = j - *n + *l + 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = 0.f;
+/* L50: */
+	    }
+/* L60: */
+	}
+
+    }
+
+/*     Let              N-L     L */
+/*                A = ( A11    A12 ) M, */
+
+/*     then the following does the complete QR decomposition of A11: */
+
+/*              A11 = U*(  0  T12 )*P1**T */
+/*                      (  0   0  ) */
+
+    i__1 = *n - *l;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	iwork[i__] = 0;
+/* L70: */
+    }
+    i__1 = *n - *l;
+    sgeqp3_(m, &i__1, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], lwork, 
+	    info);
+
+/*     Determine the effective rank of A11 */
+
+    *k = 0;
+/* Computing MIN */
+    i__2 = *m, i__3 = *n - *l;
+    i__1 = f2cmin(i__2,i__3);
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = a[i__ + i__ * a_dim1], abs(r__1)) > *tola) {
+	    ++(*k);
+	}
+/* L80: */
+    }
+
+/*     Update A12 := U**T*A12, where A12 = A( 1:M, N-L+1:N ) */
+
+/* Computing MIN */
+    i__2 = *m, i__3 = *n - *l;
+    i__1 = f2cmin(i__2,i__3);
+    sorm2r_("Left", "Transpose", m, l, &i__1, &a[a_offset], lda, &tau[1], &a[(
+	    *n - *l + 1) * a_dim1 + 1], lda, &work[1], info);
+
+    if (wantu) {
+
+/*        Copy the details of U, and form U */
+
+	slaset_("Full", m, m, &c_b14, &c_b14, &u[u_offset], ldu);
+	if (*m > 1) {
+	    i__1 = *m - 1;
+	    i__2 = *n - *l;
+	    slacpy_("Lower", &i__1, &i__2, &a[a_dim1 + 2], lda, &u[u_dim1 + 2]
+		    , ldu);
+	}
+/* Computing MIN */
+	i__2 = *m, i__3 = *n - *l;
+	i__1 = f2cmin(i__2,i__3);
+	sorg2r_(m, m, &i__1, &u[u_offset], ldu, &tau[1], &work[1], info);
+    }
+
+    if (wantq) {
+
+/*        Update Q( 1:N, 1:N-L )  = Q( 1:N, 1:N-L )*P1 */
+
+	i__1 = *n - *l;
+	slapmt_(&forwrd, n, &i__1, &q[q_offset], ldq, &iwork[1]);
+    }
+
+/*     Clean up A: set the strictly lower triangular part of */
+/*     A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. */
+
+    i__1 = *k - 1;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.f;
+/* L90: */
+	}
+/* L100: */
+    }
+    if (*m > *k) {
+	i__1 = *m - *k;
+	i__2 = *n - *l;
+	slaset_("Full", &i__1, &i__2, &c_b14, &c_b14, &a[*k + 1 + a_dim1], 
+		lda);
+    }
+
+    if (*n - *l > *k) {
+
+/*        RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 */
+
+	i__1 = *n - *l;
+	sgerq2_(k, &i__1, &a[a_offset], lda, &tau[1], &work[1], info);
+
+	if (wantq) {
+
+/*           Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**T */
+
+	    i__1 = *n - *l;
+	    sormr2_("Right", "Transpose", n, &i__1, k, &a[a_offset], lda, &
+		    tau[1], &q[q_offset], ldq, &work[1], info);
+	}
+
+/*        Clean up A */
+
+	i__1 = *n - *l - *k;
+	slaset_("Full", k, &i__1, &c_b14, &c_b14, &a[a_offset], lda);
+	i__1 = *n - *l;
+	for (j = *n - *l - *k + 1; j <= i__1; ++j) {
+	    i__2 = *k;
+	    for (i__ = j - *n + *l + *k + 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = 0.f;
+/* L110: */
+	    }
+/* L120: */
+	}
+
+    }
+
+    if (*m > *k) {
+
+/*        QR factorization of A( K+1:M,N-L+1:N ) */
+
+	i__1 = *m - *k;
+	sgeqr2_(&i__1, l, &a[*k + 1 + (*n - *l + 1) * a_dim1], lda, &tau[1], &
+		work[1], info);
+
+	if (wantu) {
+
+/*           Update U(:,K+1:M) := U(:,K+1:M)*U1 */
+
+	    i__1 = *m - *k;
+/* Computing MIN */
+	    i__3 = *m - *k;
+	    i__2 = f2cmin(i__3,*l);
+	    sorm2r_("Right", "No transpose", m, &i__1, &i__2, &a[*k + 1 + (*n 
+		    - *l + 1) * a_dim1], lda, &tau[1], &u[(*k + 1) * u_dim1 + 
+		    1], ldu, &work[1], info);
+	}
+
+/*        Clean up */
+
+	i__1 = *n;
+	for (j = *n - *l + 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j - *n + *k + *l + 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = 0.f;
+/* L130: */
+	    }
+/* L140: */
+	}
+
+    }
+
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SGGSVP3 */
+
+} /* sggsvp3_ */
+
diff --git a/lapack-netlib/SRC/sgsvj0.c b/lapack-netlib/SRC/sgsvj0.c
new file mode 100644
index 000000000..0a3bf3872
--- /dev/null
+++ b/lapack-netlib/SRC/sgsvj0.c
@@ -0,0 +1,1592 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static real c_b42 = 1.f;
+
+/* > \brief \b SGSVJ0 pre-processor for the routine sgesvj. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGSVJ0 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgsvj0.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgsvj0.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgsvj0.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, */
+/*                          SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP */
+/*       REAL               EPS, SFMIN, TOL */
+/*       CHARACTER*1        JOBV */
+/*       REAL               A( LDA, * ), SVA( N ), D( N ), V( LDV, * ), */
+/*      $                   WORK( LWORK ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGSVJ0 is called from SGESVJ as a pre-processor and that is its main */
+/* > purpose. It applies Jacobi rotations in the same way as SGESVJ does, but */
+/* > it does not check convergence (stopping criterion). Few tuning */
+/* > parameters (marked by [TP]) are available for the implementer. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >          Specifies whether the output from this procedure is used */
+/* >          to compute the matrix V: */
+/* >          = 'V': the product of the Jacobi rotations is accumulated */
+/* >                 by postmulyiplying the N-by-N array V. */
+/* >                (See the description of V.) */
+/* >          = 'A': the product of the Jacobi rotations is accumulated */
+/* >                 by postmulyiplying the MV-by-N array V. */
+/* >                (See the descriptions of MV and V.) */
+/* >          = 'N': the Jacobi rotations are not accumulated. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A. */
+/* >          M >= N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, M-by-N matrix A, such that A*diag(D) represents */
+/* >          the input matrix. */
+/* >          On exit, */
+/* >          A_onexit * D_onexit represents the input matrix A*diag(D) */
+/* >          post-multiplied by a sequence of Jacobi rotations, where the */
+/* >          rotation threshold and the total number of sweeps are given in */
+/* >          TOL and NSWEEP, respectively. */
+/* >          (See the descriptions of D, TOL and NSWEEP.) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          The array D accumulates the scaling factors from the fast scaled */
+/* >          Jacobi rotations. */
+/* >          On entry, A*diag(D) represents the input matrix. */
+/* >          On exit, A_onexit*diag(D_onexit) represents the input matrix */
+/* >          post-multiplied by a sequence of Jacobi rotations, where the */
+/* >          rotation threshold and the total number of sweeps are given in */
+/* >          TOL and NSWEEP, respectively. */
+/* >          (See the descriptions of A, TOL and NSWEEP.) */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] SVA */
+/* > \verbatim */
+/* >          SVA is REAL array, dimension (N) */
+/* >          On entry, SVA contains the Euclidean norms of the columns of */
+/* >          the matrix A*diag(D). */
+/* >          On exit, SVA contains the Euclidean norms of the columns of */
+/* >          the matrix onexit*diag(D_onexit). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MV */
+/* > \verbatim */
+/* >          MV is INTEGER */
+/* >          If JOBV = 'A', then MV rows of V are post-multipled by a */
+/* >                           sequence of Jacobi rotations. */
+/* >          If JOBV = 'N',   then MV is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension (LDV,N) */
+/* >          If JOBV = 'V' then N rows of V are post-multipled by a */
+/* >                           sequence of Jacobi rotations. */
+/* >          If JOBV = 'A' then MV rows of V are post-multipled by a */
+/* >                           sequence of Jacobi rotations. */
+/* >          If JOBV = 'N',   then V is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V,  LDV >= 1. */
+/* >          If JOBV = 'V', LDV >= N. */
+/* >          If JOBV = 'A', LDV >= MV. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] EPS */
+/* > \verbatim */
+/* >          EPS is REAL */
+/* >          EPS = SLAMCH('Epsilon') */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SFMIN */
+/* > \verbatim */
+/* >          SFMIN is REAL */
+/* >          SFMIN = SLAMCH('Safe Minimum') */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TOL */
+/* > \verbatim */
+/* >          TOL is REAL */
+/* >          TOL is the threshold for Jacobi rotations. For a pair */
+/* >          A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */
+/* >          applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NSWEEP */
+/* > \verbatim */
+/* >          NSWEEP is INTEGER */
+/* >          NSWEEP is the number of sweeps of Jacobi rotations to be */
+/* >          performed. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (LWORK) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          LWORK is the dimension of WORK. LWORK >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, then the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup realOTHERcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > SGSVJ0 is used just to enable SGESVJ to call a simplified version of */
+/* > itself to work on a submatrix of the original matrix. */
+/* > */
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* > Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */
+/* > */
+/* > \par Bugs, Examples and Comments: */
+/*  ================================= */
+/* > */
+/* > Please report all bugs and send interesting test examples and comments to */
+/* > drmac@math.hr. Thank you. */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgsvj0_(char *jobv, integer *m, integer *n, real *a, 
+	integer *lda, real *d__, real *sva, integer *mv, real *v, integer *
+	ldv, real *eps, real *sfmin, real *tol, integer *nsweep, real *work, 
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, 
+	    i__6;
+    real r__1, r__2;
+
+    /* Local variables */
+    real aapp, aapq, aaqq;
+    integer ierr;
+    real bigtheta;
+    extern real sdot_(integer *, real *, integer *, real *, integer *);
+    integer pskipped;
+    real aapp0, temp1;
+    extern real snrm2_(integer *, real *, integer *);
+    integer i__, p, q;
+    real t, apoaq, aqoap;
+    extern logical lsame_(char *, char *);
+    real theta, small, fastr[5];
+    logical applv, rsvec;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    logical rotok;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *), saxpy_(integer *, real *, real *, integer *, real *, 
+	    integer *), srotm_(integer *, real *, integer *, real *, integer *
+	    , real *);
+    real rootsfmin, cs, sn;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    integer ijblsk, swband;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    integer blskip;
+    real mxaapq, thsign;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 
+	    real *);
+    real mxsinj;
+    integer ir1, emptsw, notrot, iswrot, jbc;
+    real big;
+    integer kbl, lkahead, igl, ibr, jgl, nbl, mvl;
+    real rootbig, rooteps;
+    integer rowskip;
+    real roottol;
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    --sva;
+    --d__;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    --work;
+
+    /* Function Body */
+    applv = lsame_(jobv, "A");
+    rsvec = lsame_(jobv, "V");
+    if (! (rsvec || applv || lsame_(jobv, "N"))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0 || *n > *m) {
+	*info = -3;
+    } else if (*lda < *m) {
+	*info = -5;
+    } else if ((rsvec || applv) && *mv < 0) {
+	*info = -8;
+    } else if (rsvec && *ldv < *n || applv && *ldv < *mv) {
+	*info = -10;
+    } else if (*tol <= *eps) {
+	*info = -13;
+    } else if (*nsweep < 0) {
+	*info = -14;
+    } else if (*lwork < *m) {
+	*info = -16;
+    } else {
+	*info = 0;
+    }
+
+/*     #:( */
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGSVJ0", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    if (rsvec) {
+	mvl = *n;
+    } else if (applv) {
+	mvl = *mv;
+    }
+    rsvec = rsvec || applv;
+    rooteps = sqrt(*eps);
+    rootsfmin = sqrt(*sfmin);
+    small = *sfmin / *eps;
+    big = 1.f / *sfmin;
+    rootbig = 1.f / rootsfmin;
+    bigtheta = 1.f / rooteps;
+    roottol = sqrt(*tol);
+
+
+    emptsw = *n * (*n - 1) / 2;
+    notrot = 0;
+    fastr[0] = 0.f;
+
+
+    swband = 0;
+/* [TP] SWBAND is a tuning parameter. It is meaningful and effective */
+/*     if SGESVJ is used as a computational routine in the preconditioned */
+/*     Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure */
+/*     ...... */
+    kbl = f2cmin(8,*n);
+/* [TP] KBL is a tuning parameter that defines the tile size in the */
+/*     tiling of the p-q loops of pivot pairs. In general, an optimal */
+/*     value of KBL depends on the matrix dimensions and on the */
+/*     parameters of the computer's memory. */
+
+    nbl = *n / kbl;
+    if (nbl * kbl != *n) {
+	++nbl;
+    }
+/* Computing 2nd power */
+    i__1 = kbl;
+    blskip = i__1 * i__1 + 1;
+/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */
+    rowskip = f2cmin(5,kbl);
+/* [TP] ROWSKIP is a tuning parameter. */
+    lkahead = 1;
+/* [TP] LKAHEAD is a tuning parameter. */
+    swband = 0;
+    pskipped = 0;
+
+    i__1 = *nsweep;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+	mxaapq = 0.f;
+	mxsinj = 0.f;
+	iswrot = 0;
+
+	notrot = 0;
+	pskipped = 0;
+
+	i__2 = nbl;
+	for (ibr = 1; ibr <= i__2; ++ibr) {
+	    igl = (ibr - 1) * kbl + 1;
+
+/* Computing MIN */
+	    i__4 = lkahead, i__5 = nbl - ibr;
+	    i__3 = f2cmin(i__4,i__5);
+	    for (ir1 = 0; ir1 <= i__3; ++ir1) {
+
+		igl += ir1 * kbl;
+
+/* Computing MIN */
+		i__5 = igl + kbl - 1, i__6 = *n - 1;
+		i__4 = f2cmin(i__5,i__6);
+		for (p = igl; p <= i__4; ++p) {
+		    i__5 = *n - p + 1;
+		    q = isamax_(&i__5, &sva[p], &c__1) + p - 1;
+		    if (p != q) {
+			sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 
+				1], &c__1);
+			if (rsvec) {
+			    sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				    v_dim1 + 1], &c__1);
+			}
+			temp1 = sva[p];
+			sva[p] = sva[q];
+			sva[q] = temp1;
+			temp1 = d__[p];
+			d__[p] = d__[q];
+			d__[q] = temp1;
+		    }
+
+		    if (ir1 == 0) {
+
+/*        Column norms are periodically updated by explicit */
+/*        norm computation. */
+/*        Caveat: */
+/*        Some BLAS implementations compute SNRM2(M,A(1,p),1) */
+/*        as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may result in */
+/*        overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and */
+/*        undeflow for ||A(:,p)||_2 < SQRT(underflow_threshold). */
+/*        Hence, SNRM2 cannot be trusted, not even in the case when */
+/*        the true norm is far from the under(over)flow boundaries. */
+/*        If properly implemented SNRM2 is available, the IF-THEN-ELSE */
+/*        below should read "AAPP = SNRM2( M, A(1,p), 1 ) * D(p)". */
+
+			if (sva[p] < rootbig && sva[p] > rootsfmin) {
+			    sva[p] = snrm2_(m, &a[p * a_dim1 + 1], &c__1) * 
+				    d__[p];
+			} else {
+			    temp1 = 0.f;
+			    aapp = 1.f;
+			    slassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, &
+				    aapp);
+			    sva[p] = temp1 * sqrt(aapp) * d__[p];
+			}
+			aapp = sva[p];
+		    } else {
+			aapp = sva[p];
+		    }
+
+		    if (aapp > 0.f) {
+
+			pskipped = 0;
+
+/* Computing MIN */
+			i__6 = igl + kbl - 1;
+			i__5 = f2cmin(i__6,*n);
+			for (q = p + 1; q <= i__5; ++q) {
+
+			    aaqq = sva[q];
+			    if (aaqq > 0.f) {
+
+				aapp0 = aapp;
+				if (aaqq >= 1.f) {
+				    rotok = small * aapp <= aaqq;
+				    if (aapp < big / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * d__[p] * d__[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[1], &c__1);
+					slascl_("G", &c__0, &c__0, &aapp, &
+						d__[p], m, &c__1, &work[1], 
+						lda, &ierr);
+					aapq = sdot_(m, &work[1], &c__1, &a[q 
+						* a_dim1 + 1], &c__1) * d__[q]
+						 / aaqq;
+				    }
+				} else {
+				    rotok = aapp <= aaqq / small;
+				    if (aapp > small / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * d__[p] * d__[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[q * a_dim1 + 1], &c__1, &
+						work[1], &c__1);
+					slascl_("G", &c__0, &c__0, &aaqq, &
+						d__[q], m, &c__1, &work[1], 
+						lda, &ierr);
+					aapq = sdot_(m, &work[1], &c__1, &a[p 
+						* a_dim1 + 1], &c__1) * d__[p]
+						 / aapp;
+				    }
+				}
+
+/* Computing MAX */
+				r__1 = mxaapq, r__2 = abs(aapq);
+				mxaapq = f2cmax(r__1,r__2);
+
+/*        TO rotate or NOT to rotate, THAT is the question ... */
+
+				if (abs(aapq) > *tol) {
+
+/*           ROTATED = ROTATED + ONE */
+
+				    if (ir1 == 0) {
+					notrot = 0;
+					pskipped = 0;
+					++iswrot;
+				    }
+
+				    if (rotok) {
+
+					aqoap = aaqq / aapp;
+					apoaq = aapp / aaqq;
+					theta = (r__1 = aqoap - apoaq, abs(
+						r__1)) * -.5f / aapq;
+
+					if (abs(theta) > bigtheta) {
+
+					    t = .5f / theta;
+					    fastr[2] = t * d__[p] / d__[q];
+					    fastr[3] = -t * d__[q] / d__[p];
+					    srotm_(m, &a[p * a_dim1 + 1], &
+						    c__1, &a[q * a_dim1 + 1], 
+						    &c__1, fastr);
+					    if (rsvec) {
+			  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				  v_dim1 + 1], &c__1, fastr);
+					    }
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(t);
+					    mxsinj = f2cmax(r__1,r__2);
+
+					} else {
+
+
+					    thsign = -r_sign(&c_b42, &aapq);
+					    t = 1.f / (theta + thsign * sqrt(
+						    theta * theta + 1.f));
+					    cs = sqrt(1.f / (t * t + 1.f));
+					    sn = t * cs;
+
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(sn);
+					    mxsinj = f2cmax(r__1,r__2);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+
+					    apoaq = d__[p] / d__[q];
+					    aqoap = d__[q] / d__[p];
+					    if (d__[p] >= 1.f) {
+			  if (d__[q] >= 1.f) {
+			      fastr[2] = t * apoaq;
+			      fastr[3] = -t * aqoap;
+			      d__[p] *= cs;
+			      d__[q] *= cs;
+			      srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * 
+				      a_dim1 + 1], &c__1, fastr);
+			      if (rsvec) {
+				  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
+					  q * v_dim1 + 1], &c__1, fastr);
+			      }
+			  } else {
+			      r__1 = -t * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      r__1 = cs * sn * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      d__[p] *= cs;
+			      d__[q] /= cs;
+			      if (rsvec) {
+				  r__1 = -t * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+			      }
+			  }
+					    } else {
+			  if (d__[q] >= 1.f) {
+			      r__1 = t * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      r__1 = -cs * sn * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      d__[p] /= cs;
+			      d__[q] *= cs;
+			      if (rsvec) {
+				  r__1 = t * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+			      }
+			  } else {
+			      if (d__[p] >= d__[q]) {
+				  r__1 = -t * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  d__[p] *= cs;
+				  d__[q] /= cs;
+				  if (rsvec) {
+				      r__1 = -t * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				      r__1 = cs * sn * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				  }
+			      } else {
+				  r__1 = t * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  d__[p] /= cs;
+				  d__[q] *= cs;
+				  if (rsvec) {
+				      r__1 = t * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				      r__1 = -cs * sn * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				  }
+			      }
+			  }
+					    }
+					}
+
+				    } else {
+					scopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[1], &c__1);
+					slascl_("G", &c__0, &c__0, &aapp, &
+						c_b42, m, &c__1, &work[1], 
+						lda, &ierr);
+					slascl_("G", &c__0, &c__0, &aaqq, &
+						c_b42, m, &c__1, &a[q * 
+						a_dim1 + 1], lda, &ierr);
+					temp1 = -aapq * d__[p] / d__[q];
+					saxpy_(m, &temp1, &work[1], &c__1, &a[
+						q * a_dim1 + 1], &c__1);
+					slascl_("G", &c__0, &c__0, &c_b42, &
+						aaqq, m, &c__1, &a[q * a_dim1 
+						+ 1], lda, &ierr);
+/* Computing MAX */
+					r__1 = 0.f, r__2 = 1.f - aapq * aapq;
+					sva[q] = aaqq * sqrt((f2cmax(r__1,r__2)))
+						;
+					mxsinj = f2cmax(mxsinj,*sfmin);
+				    }
+/*           END IF ROTOK THEN ... ELSE */
+
+/*           In the case of cancellation in updating SVA(q), SVA(p) */
+/*           recompute SVA(q), SVA(p). */
+/* Computing 2nd power */
+				    r__1 = sva[q] / aaqq;
+				    if (r__1 * r__1 <= rooteps) {
+					if (aaqq < rootbig && aaqq > 
+						rootsfmin) {
+					    sva[q] = snrm2_(m, &a[q * a_dim1 
+						    + 1], &c__1) * d__[q];
+					} else {
+					    t = 0.f;
+					    aaqq = 1.f;
+					    slassq_(m, &a[q * a_dim1 + 1], &
+						    c__1, &t, &aaqq);
+					    sva[q] = t * sqrt(aaqq) * d__[q];
+					}
+				    }
+				    if (aapp / aapp0 <= rooteps) {
+					if (aapp < rootbig && aapp > 
+						rootsfmin) {
+					    aapp = snrm2_(m, &a[p * a_dim1 + 
+						    1], &c__1) * d__[p];
+					} else {
+					    t = 0.f;
+					    aapp = 1.f;
+					    slassq_(m, &a[p * a_dim1 + 1], &
+						    c__1, &t, &aapp);
+					    aapp = t * sqrt(aapp) * d__[p];
+					}
+					sva[p] = aapp;
+				    }
+
+				} else {
+/*        A(:,p) and A(:,q) already numerically orthogonal */
+				    if (ir1 == 0) {
+					++notrot;
+				    }
+				    ++pskipped;
+				}
+			    } else {
+/*        A(:,q) is zero column */
+				if (ir1 == 0) {
+				    ++notrot;
+				}
+				++pskipped;
+			    }
+
+			    if (i__ <= swband && pskipped > rowskip) {
+				if (ir1 == 0) {
+				    aapp = -aapp;
+				}
+				notrot = 0;
+				goto L2103;
+			    }
+
+/* L2002: */
+			}
+/*     END q-LOOP */
+
+L2103:
+/*     bailed out of q-loop */
+			sva[p] = aapp;
+		    } else {
+			sva[p] = aapp;
+			if (ir1 == 0 && aapp == 0.f) {
+/* Computing MIN */
+			    i__5 = igl + kbl - 1;
+			    notrot = notrot + f2cmin(i__5,*n) - p;
+			}
+		    }
+
+/* L2001: */
+		}
+/*     end of the p-loop */
+/*     end of doing the block ( ibr, ibr ) */
+/* L1002: */
+	    }
+/*     end of ir1-loop */
+
+/* ........................................................ */
+/* ... go to the off diagonal blocks */
+
+	    igl = (ibr - 1) * kbl + 1;
+
+	    i__3 = nbl;
+	    for (jbc = ibr + 1; jbc <= i__3; ++jbc) {
+
+		jgl = (jbc - 1) * kbl + 1;
+
+/*        doing the block at ( ibr, jbc ) */
+
+		ijblsk = 0;
+/* Computing MIN */
+		i__5 = igl + kbl - 1;
+		i__4 = f2cmin(i__5,*n);
+		for (p = igl; p <= i__4; ++p) {
+
+		    aapp = sva[p];
+
+		    if (aapp > 0.f) {
+
+			pskipped = 0;
+
+/* Computing MIN */
+			i__6 = jgl + kbl - 1;
+			i__5 = f2cmin(i__6,*n);
+			for (q = jgl; q <= i__5; ++q) {
+
+			    aaqq = sva[q];
+
+			    if (aaqq > 0.f) {
+				aapp0 = aapp;
+
+
+
+				if (aaqq >= 1.f) {
+				    if (aapp >= aaqq) {
+					rotok = small * aapp <= aaqq;
+				    } else {
+					rotok = small * aaqq <= aapp;
+				    }
+				    if (aapp < big / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * d__[p] * d__[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[1], &c__1);
+					slascl_("G", &c__0, &c__0, &aapp, &
+						d__[p], m, &c__1, &work[1], 
+						lda, &ierr);
+					aapq = sdot_(m, &work[1], &c__1, &a[q 
+						* a_dim1 + 1], &c__1) * d__[q]
+						 / aaqq;
+				    }
+				} else {
+				    if (aapp >= aaqq) {
+					rotok = aapp <= aaqq / small;
+				    } else {
+					rotok = aaqq <= aapp / small;
+				    }
+				    if (aapp > small / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * d__[p] * d__[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[q * a_dim1 + 1], &c__1, &
+						work[1], &c__1);
+					slascl_("G", &c__0, &c__0, &aaqq, &
+						d__[q], m, &c__1, &work[1], 
+						lda, &ierr);
+					aapq = sdot_(m, &work[1], &c__1, &a[p 
+						* a_dim1 + 1], &c__1) * d__[p]
+						 / aapp;
+				    }
+				}
+
+/* Computing MAX */
+				r__1 = mxaapq, r__2 = abs(aapq);
+				mxaapq = f2cmax(r__1,r__2);
+
+/*        TO rotate or NOT to rotate, THAT is the question ... */
+
+				if (abs(aapq) > *tol) {
+				    notrot = 0;
+/*           ROTATED  = ROTATED + 1 */
+				    pskipped = 0;
+				    ++iswrot;
+
+				    if (rotok) {
+
+					aqoap = aaqq / aapp;
+					apoaq = aapp / aaqq;
+					theta = (r__1 = aqoap - apoaq, abs(
+						r__1)) * -.5f / aapq;
+					if (aaqq > aapp0) {
+					    theta = -theta;
+					}
+
+					if (abs(theta) > bigtheta) {
+					    t = .5f / theta;
+					    fastr[2] = t * d__[p] / d__[q];
+					    fastr[3] = -t * d__[q] / d__[p];
+					    srotm_(m, &a[p * a_dim1 + 1], &
+						    c__1, &a[q * a_dim1 + 1], 
+						    &c__1, fastr);
+					    if (rsvec) {
+			  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				  v_dim1 + 1], &c__1, fastr);
+					    }
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(t);
+					    mxsinj = f2cmax(r__1,r__2);
+					} else {
+
+
+					    thsign = -r_sign(&c_b42, &aapq);
+					    if (aaqq > aapp0) {
+			  thsign = -thsign;
+					    }
+					    t = 1.f / (theta + thsign * sqrt(
+						    theta * theta + 1.f));
+					    cs = sqrt(1.f / (t * t + 1.f));
+					    sn = t * cs;
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(sn);
+					    mxsinj = f2cmax(r__1,r__2);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+
+					    apoaq = d__[p] / d__[q];
+					    aqoap = d__[q] / d__[p];
+					    if (d__[p] >= 1.f) {
+
+			  if (d__[q] >= 1.f) {
+			      fastr[2] = t * apoaq;
+			      fastr[3] = -t * aqoap;
+			      d__[p] *= cs;
+			      d__[q] *= cs;
+			      srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * 
+				      a_dim1 + 1], &c__1, fastr);
+			      if (rsvec) {
+				  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
+					  q * v_dim1 + 1], &c__1, fastr);
+			      }
+			  } else {
+			      r__1 = -t * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      r__1 = cs * sn * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      if (rsvec) {
+				  r__1 = -t * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+			      }
+			      d__[p] *= cs;
+			      d__[q] /= cs;
+			  }
+					    } else {
+			  if (d__[q] >= 1.f) {
+			      r__1 = t * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      r__1 = -cs * sn * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      if (rsvec) {
+				  r__1 = t * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+			      }
+			      d__[p] /= cs;
+			      d__[q] *= cs;
+			  } else {
+			      if (d__[p] >= d__[q]) {
+				  r__1 = -t * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  d__[p] *= cs;
+				  d__[q] /= cs;
+				  if (rsvec) {
+				      r__1 = -t * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				      r__1 = cs * sn * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				  }
+			      } else {
+				  r__1 = t * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  d__[p] /= cs;
+				  d__[q] *= cs;
+				  if (rsvec) {
+				      r__1 = t * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				      r__1 = -cs * sn * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				  }
+			      }
+			  }
+					    }
+					}
+
+				    } else {
+					if (aapp > aaqq) {
+					    scopy_(m, &a[p * a_dim1 + 1], &
+						    c__1, &work[1], &c__1);
+					    slascl_("G", &c__0, &c__0, &aapp, 
+						    &c_b42, m, &c__1, &work[1]
+						    , lda, &ierr);
+					    slascl_("G", &c__0, &c__0, &aaqq, 
+						    &c_b42, m, &c__1, &a[q * 
+						    a_dim1 + 1], lda, &ierr);
+					    temp1 = -aapq * d__[p] / d__[q];
+					    saxpy_(m, &temp1, &work[1], &c__1,
+						     &a[q * a_dim1 + 1], &
+						    c__1);
+					    slascl_("G", &c__0, &c__0, &c_b42,
+						     &aaqq, m, &c__1, &a[q * 
+						    a_dim1 + 1], lda, &ierr);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - aapq * 
+						    aapq;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+					    mxsinj = f2cmax(mxsinj,*sfmin);
+					} else {
+					    scopy_(m, &a[q * a_dim1 + 1], &
+						    c__1, &work[1], &c__1);
+					    slascl_("G", &c__0, &c__0, &aaqq, 
+						    &c_b42, m, &c__1, &work[1]
+						    , lda, &ierr);
+					    slascl_("G", &c__0, &c__0, &aapp, 
+						    &c_b42, m, &c__1, &a[p * 
+						    a_dim1 + 1], lda, &ierr);
+					    temp1 = -aapq * d__[q] / d__[p];
+					    saxpy_(m, &temp1, &work[1], &c__1,
+						     &a[p * a_dim1 + 1], &
+						    c__1);
+					    slascl_("G", &c__0, &c__0, &c_b42,
+						     &aapp, m, &c__1, &a[p * 
+						    a_dim1 + 1], lda, &ierr);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - aapq * 
+						    aapq;
+					    sva[p] = aapp * sqrt((f2cmax(r__1,
+						    r__2)));
+					    mxsinj = f2cmax(mxsinj,*sfmin);
+					}
+				    }
+/*           END IF ROTOK THEN ... ELSE */
+
+/*           In the case of cancellation in updating SVA(q) */
+/* Computing 2nd power */
+				    r__1 = sva[q] / aaqq;
+				    if (r__1 * r__1 <= rooteps) {
+					if (aaqq < rootbig && aaqq > 
+						rootsfmin) {
+					    sva[q] = snrm2_(m, &a[q * a_dim1 
+						    + 1], &c__1) * d__[q];
+					} else {
+					    t = 0.f;
+					    aaqq = 1.f;
+					    slassq_(m, &a[q * a_dim1 + 1], &
+						    c__1, &t, &aaqq);
+					    sva[q] = t * sqrt(aaqq) * d__[q];
+					}
+				    }
+/* Computing 2nd power */
+				    r__1 = aapp / aapp0;
+				    if (r__1 * r__1 <= rooteps) {
+					if (aapp < rootbig && aapp > 
+						rootsfmin) {
+					    aapp = snrm2_(m, &a[p * a_dim1 + 
+						    1], &c__1) * d__[p];
+					} else {
+					    t = 0.f;
+					    aapp = 1.f;
+					    slassq_(m, &a[p * a_dim1 + 1], &
+						    c__1, &t, &aapp);
+					    aapp = t * sqrt(aapp) * d__[p];
+					}
+					sva[p] = aapp;
+				    }
+/*              end of OK rotation */
+				} else {
+				    ++notrot;
+				    ++pskipped;
+				    ++ijblsk;
+				}
+			    } else {
+				++notrot;
+				++pskipped;
+				++ijblsk;
+			    }
+
+			    if (i__ <= swband && ijblsk >= blskip) {
+				sva[p] = aapp;
+				notrot = 0;
+				goto L2011;
+			    }
+			    if (i__ <= swband && pskipped > rowskip) {
+				aapp = -aapp;
+				notrot = 0;
+				goto L2203;
+			    }
+
+/* L2200: */
+			}
+/*        end of the q-loop */
+L2203:
+
+			sva[p] = aapp;
+
+		    } else {
+			if (aapp == 0.f) {
+/* Computing MIN */
+			    i__5 = jgl + kbl - 1;
+			    notrot = notrot + f2cmin(i__5,*n) - jgl + 1;
+			}
+			if (aapp < 0.f) {
+			    notrot = 0;
+			}
+		    }
+/* L2100: */
+		}
+/*     end of the p-loop */
+/* L2010: */
+	    }
+/*     end of the jbc-loop */
+L2011:
+/* 2011 bailed out of the jbc-loop */
+/* Computing MIN */
+	    i__4 = igl + kbl - 1;
+	    i__3 = f2cmin(i__4,*n);
+	    for (p = igl; p <= i__3; ++p) {
+		sva[p] = (r__1 = sva[p], abs(r__1));
+/* L2012: */
+	    }
+
+/* L2000: */
+	}
+/* 2000 :: end of the ibr-loop */
+
+	if (sva[*n] < rootbig && sva[*n] > rootsfmin) {
+	    sva[*n] = snrm2_(m, &a[*n * a_dim1 + 1], &c__1) * d__[*n];
+	} else {
+	    t = 0.f;
+	    aapp = 1.f;
+	    slassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp);
+	    sva[*n] = t * sqrt(aapp) * d__[*n];
+	}
+
+/*     Additional steering devices */
+
+	if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) {
+	    swband = i__;
+	}
+
+	if (i__ > swband + 1 && mxaapq < (real) (*n) * *tol && (real) (*n) * 
+		mxaapq * mxsinj < *tol) {
+	    goto L1994;
+	}
+
+	if (notrot >= emptsw) {
+	    goto L1994;
+	}
+/* L1993: */
+    }
+/*     end i=1:NSWEEP loop */
+/* #:) Reaching this point means that the procedure has completed the given */
+/*     number of iterations. */
+    *info = *nsweep - 1;
+    goto L1995;
+L1994:
+/* #:) Reaching this point means that during the i-th sweep all pivots were */
+/*     below the given tolerance, causing early exit. */
+
+    *info = 0;
+/* #:) INFO = 0 confirms successful iterations. */
+L1995:
+
+/*     Sort the vector D. */
+    i__1 = *n - 1;
+    for (p = 1; p <= i__1; ++p) {
+	i__2 = *n - p + 1;
+	q = isamax_(&i__2, &sva[p], &c__1) + p - 1;
+	if (p != q) {
+	    temp1 = sva[p];
+	    sva[p] = sva[q];
+	    sva[q] = temp1;
+	    temp1 = d__[p];
+	    d__[p] = d__[q];
+	    d__[q] = temp1;
+	    sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1);
+	    if (rsvec) {
+		sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &
+			c__1);
+	    }
+	}
+/* L5991: */
+    }
+
+    return 0;
+} /* sgsvj0_ */
+
diff --git a/lapack-netlib/SRC/sgsvj1.c b/lapack-netlib/SRC/sgsvj1.c
new file mode 100644
index 000000000..0516e5b31
--- /dev/null
+++ b/lapack-netlib/SRC/sgsvj1.c
@@ -0,0 +1,1234 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static real c_b35 = 1.f;
+
+/* > \brief \b SGSVJ1 pre-processor for the routine sgesvj, applies Jacobi rotations targeting only particular
+ pivots. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGSVJ1 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgsvj1.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgsvj1.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgsvj1.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, */
+/*                          EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) */
+
+/*       REAL               EPS, SFMIN, TOL */
+/*       INTEGER            INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP */
+/*       CHARACTER*1        JOBV */
+/*       REAL               A( LDA, * ), D( N ), SVA( N ), V( LDV, * ), */
+/*      $                   WORK( LWORK ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGSVJ1 is called from SGESVJ as a pre-processor and that is its main */
+/* > purpose. It applies Jacobi rotations in the same way as SGESVJ does, but */
+/* > it targets only particular pivots and it does not check convergence */
+/* > (stopping criterion). Few tunning parameters (marked by [TP]) are */
+/* > available for the implementer. */
+/* > */
+/* > Further Details */
+/* > ~~~~~~~~~~~~~~~ */
+/* > SGSVJ1 applies few sweeps of Jacobi rotations in the column space of */
+/* > the input M-by-N matrix A. The pivot pairs are taken from the (1,2) */
+/* > off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The */
+/* > block-entries (tiles) of the (1,2) off-diagonal block are marked by the */
+/* > [x]'s in the following scheme: */
+/* > */
+/* >    | *  *  * [x] [x] [x]| */
+/* >    | *  *  * [x] [x] [x]|    Row-cycling in the nblr-by-nblc [x] blocks. */
+/* >    | *  *  * [x] [x] [x]|    Row-cyclic pivoting inside each [x] block. */
+/* >    |[x] [x] [x] *  *  * | */
+/* >    |[x] [x] [x] *  *  * | */
+/* >    |[x] [x] [x] *  *  * | */
+/* > */
+/* > In terms of the columns of A, the first N1 columns are rotated 'against' */
+/* > the remaining N-N1 columns, trying to increase the angle between the */
+/* > corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is */
+/* > tiled using quadratic tiles of side KBL. Here, KBL is a tunning parameter. */
+/* > The number of sweeps is given in NSWEEP and the orthogonality threshold */
+/* > is given in TOL. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >          Specifies whether the output from this procedure is used */
+/* >          to compute the matrix V: */
+/* >          = 'V': the product of the Jacobi rotations is accumulated */
+/* >                 by postmulyiplying the N-by-N array V. */
+/* >                (See the description of V.) */
+/* >          = 'A': the product of the Jacobi rotations is accumulated */
+/* >                 by postmulyiplying the MV-by-N array V. */
+/* >                (See the descriptions of MV and V.) */
+/* >          = 'N': the Jacobi rotations are not accumulated. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A. */
+/* >          M >= N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N1 */
+/* > \verbatim */
+/* >          N1 is INTEGER */
+/* >          N1 specifies the 2 x 2 block partition, the first N1 columns are */
+/* >          rotated 'against' the remaining N-N1 columns of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >          On entry, M-by-N matrix A, such that A*diag(D) represents */
+/* >          the input matrix. */
+/* >          On exit, */
+/* >          A_onexit * D_onexit represents the input matrix A*diag(D) */
+/* >          post-multiplied by a sequence of Jacobi rotations, where the */
+/* >          rotation threshold and the total number of sweeps are given in */
+/* >          TOL and NSWEEP, respectively. */
+/* >          (See the descriptions of N1, D, TOL and NSWEEP.) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          The array D accumulates the scaling factors from the fast scaled */
+/* >          Jacobi rotations. */
+/* >          On entry, A*diag(D) represents the input matrix. */
+/* >          On exit, A_onexit*diag(D_onexit) represents the input matrix */
+/* >          post-multiplied by a sequence of Jacobi rotations, where the */
+/* >          rotation threshold and the total number of sweeps are given in */
+/* >          TOL and NSWEEP, respectively. */
+/* >          (See the descriptions of N1, A, TOL and NSWEEP.) */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] SVA */
+/* > \verbatim */
+/* >          SVA is REAL array, dimension (N) */
+/* >          On entry, SVA contains the Euclidean norms of the columns of */
+/* >          the matrix A*diag(D). */
+/* >          On exit, SVA contains the Euclidean norms of the columns of */
+/* >          the matrix onexit*diag(D_onexit). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MV */
+/* > \verbatim */
+/* >          MV is INTEGER */
+/* >          If JOBV = 'A', then MV rows of V are post-multipled by a */
+/* >                           sequence of Jacobi rotations. */
+/* >          If JOBV = 'N',   then MV is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] V */
+/* > \verbatim */
+/* >          V is REAL array, dimension (LDV,N) */
+/* >          If JOBV = 'V' then N rows of V are post-multipled by a */
+/* >                           sequence of Jacobi rotations. */
+/* >          If JOBV = 'A' then MV rows of V are post-multipled by a */
+/* >                           sequence of Jacobi rotations. */
+/* >          If JOBV = 'N',   then V is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V,  LDV >= 1. */
+/* >          If JOBV = 'V', LDV >= N. */
+/* >          If JOBV = 'A', LDV >= MV. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] EPS */
+/* > \verbatim */
+/* >          EPS is REAL */
+/* >          EPS = SLAMCH('Epsilon') */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SFMIN */
+/* > \verbatim */
+/* >          SFMIN is REAL */
+/* >          SFMIN = SLAMCH('Safe Minimum') */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TOL */
+/* > \verbatim */
+/* >          TOL is REAL */
+/* >          TOL is the threshold for Jacobi rotations. For a pair */
+/* >          A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */
+/* >          applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NSWEEP */
+/* > \verbatim */
+/* >          NSWEEP is INTEGER */
+/* >          NSWEEP is the number of sweeps of Jacobi rotations to be */
+/* >          performed. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (LWORK) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          LWORK is the dimension of WORK. LWORK >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, then the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup realOTHERcomputational */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* > Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgsvj1_(char *jobv, integer *m, integer *n, integer *n1, 
+	real *a, integer *lda, real *d__, real *sva, integer *mv, real *v, 
+	integer *ldv, real *eps, real *sfmin, real *tol, integer *nsweep, 
+	real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, 
+	    i__6;
+    real r__1, r__2;
+
+    /* Local variables */
+    integer nblc;
+    real aapp, aapq, aaqq;
+    integer nblr, ierr;
+    real bigtheta;
+    extern real sdot_(integer *, real *, integer *, real *, integer *);
+    integer pskipped;
+    real aapp0, temp1;
+    extern real snrm2_(integer *, real *, integer *);
+    integer i__, p, q;
+    real t, large, apoaq, aqoap;
+    extern logical lsame_(char *, char *);
+    real theta, small, fastr[5];
+    logical applv, rsvec;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    logical rotok;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
+	    integer *), saxpy_(integer *, real *, real *, integer *, real *, 
+	    integer *), srotm_(integer *, real *, integer *, real *, integer *
+	    , real *);
+    real rootsfmin, cs, sn;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    integer ijblsk, swband;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    integer blskip;
+    real mxaapq, thsign;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 
+	    real *);
+    real mxsinj;
+    integer emptsw, notrot, iswrot, jbc;
+    real big;
+    integer kbl, igl, ibr, jgl, mvl;
+    real rootbig, rooteps;
+    integer rowskip;
+    real roottol;
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    --sva;
+    --d__;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    --work;
+
+    /* Function Body */
+    applv = lsame_(jobv, "A");
+    rsvec = lsame_(jobv, "V");
+    if (! (rsvec || applv || lsame_(jobv, "N"))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0 || *n > *m) {
+	*info = -3;
+    } else if (*n1 < 0) {
+	*info = -4;
+    } else if (*lda < *m) {
+	*info = -6;
+    } else if ((rsvec || applv) && *mv < 0) {
+	*info = -9;
+    } else if (rsvec && *ldv < *n || applv && *ldv < *mv) {
+	*info = -11;
+    } else if (*tol <= *eps) {
+	*info = -14;
+    } else if (*nsweep < 0) {
+	*info = -15;
+    } else if (*lwork < *m) {
+	*info = -17;
+    } else {
+	*info = 0;
+    }
+
+/*     #:( */
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGSVJ1", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    if (rsvec) {
+	mvl = *n;
+    } else if (applv) {
+	mvl = *mv;
+    }
+    rsvec = rsvec || applv;
+    rooteps = sqrt(*eps);
+    rootsfmin = sqrt(*sfmin);
+    small = *sfmin / *eps;
+    big = 1.f / *sfmin;
+    rootbig = 1.f / rootsfmin;
+    large = big / sqrt((real) (*m * *n));
+    bigtheta = 1.f / rooteps;
+    roottol = sqrt(*tol);
+
+
+/*     RSVEC = LSAME( JOBV, 'Y' ) */
+
+    emptsw = *n1 * (*n - *n1);
+    notrot = 0;
+    fastr[0] = 0.f;
+
+
+    kbl = f2cmin(8,*n);
+    nblr = *n1 / kbl;
+    if (nblr * kbl != *n1) {
+	++nblr;
+    }
+    nblc = (*n - *n1) / kbl;
+    if (nblc * kbl != *n - *n1) {
+	++nblc;
+    }
+/* Computing 2nd power */
+    i__1 = kbl;
+    blskip = i__1 * i__1 + 1;
+/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */
+    rowskip = f2cmin(5,kbl);
+/* [TP] ROWSKIP is a tuning parameter. */
+    swband = 0;
+/* [TP] SWBAND is a tuning parameter. It is meaningful and effective */
+/*     if SGESVJ is used as a computational routine in the preconditioned */
+/*     Jacobi SVD algorithm SGESVJ. */
+
+
+/*     | *   *   * [x] [x] [x]| */
+/*     | *   *   * [x] [x] [x]|    Row-cycling in the nblr-by-nblc [x] blocks. */
+/*     | *   *   * [x] [x] [x]|    Row-cyclic pivoting inside each [x] block. */
+/*     |[x] [x] [x] *   *   * | */
+/*     |[x] [x] [x] *   *   * | */
+/*     |[x] [x] [x] *   *   * | */
+
+
+    i__1 = *nsweep;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+	mxaapq = 0.f;
+	mxsinj = 0.f;
+	iswrot = 0;
+
+	notrot = 0;
+	pskipped = 0;
+
+	i__2 = nblr;
+	for (ibr = 1; ibr <= i__2; ++ibr) {
+	    igl = (ibr - 1) * kbl + 1;
+
+
+/* ........................................................ */
+/* ... go to the off diagonal blocks */
+	    igl = (ibr - 1) * kbl + 1;
+	    i__3 = nblc;
+	    for (jbc = 1; jbc <= i__3; ++jbc) {
+		jgl = *n1 + (jbc - 1) * kbl + 1;
+/*        doing the block at ( ibr, jbc ) */
+		ijblsk = 0;
+/* Computing MIN */
+		i__5 = igl + kbl - 1;
+		i__4 = f2cmin(i__5,*n1);
+		for (p = igl; p <= i__4; ++p) {
+		    aapp = sva[p];
+		    if (aapp > 0.f) {
+			pskipped = 0;
+/* Computing MIN */
+			i__6 = jgl + kbl - 1;
+			i__5 = f2cmin(i__6,*n);
+			for (q = jgl; q <= i__5; ++q) {
+
+			    aaqq = sva[q];
+			    if (aaqq > 0.f) {
+				aapp0 = aapp;
+
+
+
+				if (aaqq >= 1.f) {
+				    if (aapp >= aaqq) {
+					rotok = small * aapp <= aaqq;
+				    } else {
+					rotok = small * aaqq <= aapp;
+				    }
+				    if (aapp < big / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * d__[p] * d__[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[1], &c__1);
+					slascl_("G", &c__0, &c__0, &aapp, &
+						d__[p], m, &c__1, &work[1], 
+						lda, &ierr);
+					aapq = sdot_(m, &work[1], &c__1, &a[q 
+						* a_dim1 + 1], &c__1) * d__[q]
+						 / aaqq;
+				    }
+				} else {
+				    if (aapp >= aaqq) {
+					rotok = aapp <= aaqq / small;
+				    } else {
+					rotok = aaqq <= aapp / small;
+				    }
+				    if (aapp > small / aaqq) {
+					aapq = sdot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * d__[p] * d__[q] / 
+						aaqq / aapp;
+				    } else {
+					scopy_(m, &a[q * a_dim1 + 1], &c__1, &
+						work[1], &c__1);
+					slascl_("G", &c__0, &c__0, &aaqq, &
+						d__[q], m, &c__1, &work[1], 
+						lda, &ierr);
+					aapq = sdot_(m, &work[1], &c__1, &a[p 
+						* a_dim1 + 1], &c__1) * d__[p]
+						 / aapp;
+				    }
+				}
+/* Computing MAX */
+				r__1 = mxaapq, r__2 = abs(aapq);
+				mxaapq = f2cmax(r__1,r__2);
+/*        TO rotate or NOT to rotate, THAT is the question ... */
+
+				if (abs(aapq) > *tol) {
+				    notrot = 0;
+/*           ROTATED  = ROTATED + 1 */
+				    pskipped = 0;
+				    ++iswrot;
+
+				    if (rotok) {
+
+					aqoap = aaqq / aapp;
+					apoaq = aapp / aaqq;
+					theta = (r__1 = aqoap - apoaq, abs(
+						r__1)) * -.5f / aapq;
+					if (aaqq > aapp0) {
+					    theta = -theta;
+					}
+					if (abs(theta) > bigtheta) {
+					    t = .5f / theta;
+					    fastr[2] = t * d__[p] / d__[q];
+					    fastr[3] = -t * d__[q] / d__[p];
+					    srotm_(m, &a[p * a_dim1 + 1], &
+						    c__1, &a[q * a_dim1 + 1], 
+						    &c__1, fastr);
+					    if (rsvec) {
+			  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				  v_dim1 + 1], &c__1, fastr);
+					    }
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(t);
+					    mxsinj = f2cmax(r__1,r__2);
+					} else {
+
+
+					    thsign = -r_sign(&c_b35, &aapq);
+					    if (aaqq > aapp0) {
+			  thsign = -thsign;
+					    }
+					    t = 1.f / (theta + thsign * sqrt(
+						    theta * theta + 1.f));
+					    cs = sqrt(1.f / (t * t + 1.f));
+					    sn = t * cs;
+/* Computing MAX */
+					    r__1 = mxsinj, r__2 = abs(sn);
+					    mxsinj = f2cmax(r__1,r__2);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = t * apoaq * 
+						    aapq + 1.f;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - t * 
+						    aqoap * aapq;
+					    aapp *= sqrt((f2cmax(r__1,r__2)));
+					    apoaq = d__[p] / d__[q];
+					    aqoap = d__[q] / d__[p];
+					    if (d__[p] >= 1.f) {
+
+			  if (d__[q] >= 1.f) {
+			      fastr[2] = t * apoaq;
+			      fastr[3] = -t * aqoap;
+			      d__[p] *= cs;
+			      d__[q] *= cs;
+			      srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * 
+				      a_dim1 + 1], &c__1, fastr);
+			      if (rsvec) {
+				  srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
+					  q * v_dim1 + 1], &c__1, fastr);
+			      }
+			  } else {
+			      r__1 = -t * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      r__1 = cs * sn * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      if (rsvec) {
+				  r__1 = -t * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+			      }
+			      d__[p] *= cs;
+			      d__[q] /= cs;
+			  }
+					    } else {
+			  if (d__[q] >= 1.f) {
+			      r__1 = t * apoaq;
+			      saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      r__1 = -cs * sn * aqoap;
+			      saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      if (rsvec) {
+				  r__1 = t * apoaq;
+				  saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+			      }
+			      d__[p] /= cs;
+			      d__[q] *= cs;
+			  } else {
+			      if (d__[p] >= d__[q]) {
+				  r__1 = -t * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  r__1 = cs * sn * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  d__[p] *= cs;
+				  d__[q] /= cs;
+				  if (rsvec) {
+				      r__1 = -t * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				      r__1 = cs * sn * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				  }
+			      } else {
+				  r__1 = t * apoaq;
+				  saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  r__1 = -cs * sn * aqoap;
+				  saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  d__[p] /= cs;
+				  d__[q] *= cs;
+				  if (rsvec) {
+				      r__1 = t * apoaq;
+				      saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				      r__1 = -cs * sn * aqoap;
+				      saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				  }
+			      }
+			  }
+					    }
+					}
+				    } else {
+					if (aapp > aaqq) {
+					    scopy_(m, &a[p * a_dim1 + 1], &
+						    c__1, &work[1], &c__1);
+					    slascl_("G", &c__0, &c__0, &aapp, 
+						    &c_b35, m, &c__1, &work[1]
+						    , lda, &ierr);
+					    slascl_("G", &c__0, &c__0, &aaqq, 
+						    &c_b35, m, &c__1, &a[q * 
+						    a_dim1 + 1], lda, &ierr);
+					    temp1 = -aapq * d__[p] / d__[q];
+					    saxpy_(m, &temp1, &work[1], &c__1,
+						     &a[q * a_dim1 + 1], &
+						    c__1);
+					    slascl_("G", &c__0, &c__0, &c_b35,
+						     &aaqq, m, &c__1, &a[q * 
+						    a_dim1 + 1], lda, &ierr);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - aapq * 
+						    aapq;
+					    sva[q] = aaqq * sqrt((f2cmax(r__1,
+						    r__2)));
+					    mxsinj = f2cmax(mxsinj,*sfmin);
+					} else {
+					    scopy_(m, &a[q * a_dim1 + 1], &
+						    c__1, &work[1], &c__1);
+					    slascl_("G", &c__0, &c__0, &aaqq, 
+						    &c_b35, m, &c__1, &work[1]
+						    , lda, &ierr);
+					    slascl_("G", &c__0, &c__0, &aapp, 
+						    &c_b35, m, &c__1, &a[p * 
+						    a_dim1 + 1], lda, &ierr);
+					    temp1 = -aapq * d__[q] / d__[p];
+					    saxpy_(m, &temp1, &work[1], &c__1,
+						     &a[p * a_dim1 + 1], &
+						    c__1);
+					    slascl_("G", &c__0, &c__0, &c_b35,
+						     &aapp, m, &c__1, &a[p * 
+						    a_dim1 + 1], lda, &ierr);
+/* Computing MAX */
+					    r__1 = 0.f, r__2 = 1.f - aapq * 
+						    aapq;
+					    sva[p] = aapp * sqrt((f2cmax(r__1,
+						    r__2)));
+					    mxsinj = f2cmax(mxsinj,*sfmin);
+					}
+				    }
+/*           END IF ROTOK THEN ... ELSE */
+
+/*           In the case of cancellation in updating SVA(q) */
+/* Computing 2nd power */
+				    r__1 = sva[q] / aaqq;
+				    if (r__1 * r__1 <= rooteps) {
+					if (aaqq < rootbig && aaqq > 
+						rootsfmin) {
+					    sva[q] = snrm2_(m, &a[q * a_dim1 
+						    + 1], &c__1) * d__[q];
+					} else {
+					    t = 0.f;
+					    aaqq = 1.f;
+					    slassq_(m, &a[q * a_dim1 + 1], &
+						    c__1, &t, &aaqq);
+					    sva[q] = t * sqrt(aaqq) * d__[q];
+					}
+				    }
+/* Computing 2nd power */
+				    r__1 = aapp / aapp0;
+				    if (r__1 * r__1 <= rooteps) {
+					if (aapp < rootbig && aapp > 
+						rootsfmin) {
+					    aapp = snrm2_(m, &a[p * a_dim1 + 
+						    1], &c__1) * d__[p];
+					} else {
+					    t = 0.f;
+					    aapp = 1.f;
+					    slassq_(m, &a[p * a_dim1 + 1], &
+						    c__1, &t, &aapp);
+					    aapp = t * sqrt(aapp) * d__[p];
+					}
+					sva[p] = aapp;
+				    }
+/*              end of OK rotation */
+				} else {
+				    ++notrot;
+/*           SKIPPED  = SKIPPED  + 1 */
+				    ++pskipped;
+				    ++ijblsk;
+				}
+			    } else {
+				++notrot;
+				++pskipped;
+				++ijblsk;
+			    }
+/*      IF ( NOTROT .GE. EMPTSW )  GO TO 2011 */
+			    if (i__ <= swband && ijblsk >= blskip) {
+				sva[p] = aapp;
+				notrot = 0;
+				goto L2011;
+			    }
+			    if (i__ <= swband && pskipped > rowskip) {
+				aapp = -aapp;
+				notrot = 0;
+				goto L2203;
+			    }
+
+/* L2200: */
+			}
+/*        end of the q-loop */
+L2203:
+			sva[p] = aapp;
+
+		    } else {
+			if (aapp == 0.f) {
+/* Computing MIN */
+			    i__5 = jgl + kbl - 1;
+			    notrot = notrot + f2cmin(i__5,*n) - jgl + 1;
+			}
+			if (aapp < 0.f) {
+			    notrot = 0;
+			}
+/* **      IF ( NOTROT .GE. EMPTSW )  GO TO 2011 */
+		    }
+/* L2100: */
+		}
+/*     end of the p-loop */
+/* L2010: */
+	    }
+/*     end of the jbc-loop */
+L2011:
+/* 2011 bailed out of the jbc-loop */
+/* Computing MIN */
+	    i__4 = igl + kbl - 1;
+	    i__3 = f2cmin(i__4,*n);
+	    for (p = igl; p <= i__3; ++p) {
+		sva[p] = (r__1 = sva[p], abs(r__1));
+/* L2012: */
+	    }
+/* **   IF ( NOTROT .GE. EMPTSW ) GO TO 1994 */
+/* L2000: */
+	}
+/* 2000 :: end of the ibr-loop */
+
+	if (sva[*n] < rootbig && sva[*n] > rootsfmin) {
+	    sva[*n] = snrm2_(m, &a[*n * a_dim1 + 1], &c__1) * d__[*n];
+	} else {
+	    t = 0.f;
+	    aapp = 1.f;
+	    slassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp);
+	    sva[*n] = t * sqrt(aapp) * d__[*n];
+	}
+
+/*     Additional steering devices */
+
+	if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) {
+	    swband = i__;
+	}
+	if (i__ > swband + 1 && mxaapq < (real) (*n) * *tol && (real) (*n) * 
+		mxaapq * mxsinj < *tol) {
+	    goto L1994;
+	}
+
+	if (notrot >= emptsw) {
+	    goto L1994;
+	}
+/* L1993: */
+    }
+/*     end i=1:NSWEEP loop */
+/* #:) Reaching this point means that the procedure has completed the given */
+/*     number of sweeps. */
+    *info = *nsweep - 1;
+    goto L1995;
+L1994:
+/* #:) Reaching this point means that during the i-th sweep all pivots were */
+/*     below the given threshold, causing early exit. */
+    *info = 0;
+/* #:) INFO = 0 confirms successful iterations. */
+L1995:
+
+/*     Sort the vector D */
+
+    i__1 = *n - 1;
+    for (p = 1; p <= i__1; ++p) {
+	i__2 = *n - p + 1;
+	q = isamax_(&i__2, &sva[p], &c__1) + p - 1;
+	if (p != q) {
+	    temp1 = sva[p];
+	    sva[p] = sva[q];
+	    sva[q] = temp1;
+	    temp1 = d__[p];
+	    d__[p] = d__[q];
+	    d__[q] = temp1;
+	    sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1);
+	    if (rsvec) {
+		sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &
+			c__1);
+	    }
+	}
+/* L5991: */
+    }
+
+    return 0;
+} /* sgsvj1_ */
+
diff --git a/lapack-netlib/SRC/sgtcon.c b/lapack-netlib/SRC/sgtcon.c
new file mode 100644
index 000000000..32e36623b
--- /dev/null
+++ b/lapack-netlib/SRC/sgtcon.c
@@ -0,0 +1,649 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SGTCON */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGTCON + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtcon.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtcon.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtcon.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, */
+/*                          WORK, IWORK, INFO ) */
+
+/*       CHARACTER          NORM */
+/*       INTEGER            INFO, N */
+/*       REAL               ANORM, RCOND */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       REAL               D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGTCON estimates the reciprocal of the condition number of a real */
+/* > tridiagonal matrix A using the LU factorization as computed by */
+/* > SGTTRF. */
+/* > */
+/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
+/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] NORM */
+/* > \verbatim */
+/* >          NORM is CHARACTER*1 */
+/* >          Specifies whether the 1-norm condition number or the */
+/* >          infinity-norm condition number is required: */
+/* >          = '1' or 'O':  1-norm; */
+/* >          = 'I':         Infinity-norm. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DL */
+/* > \verbatim */
+/* >          DL is REAL array, dimension (N-1) */
+/* >          The (n-1) multipliers that define the matrix L from the */
+/* >          LU factorization of A as computed by SGTTRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          The n diagonal elements of the upper triangular matrix U from */
+/* >          the LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DU */
+/* > \verbatim */
+/* >          DU is REAL array, dimension (N-1) */
+/* >          The (n-1) elements of the first superdiagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DU2 */
+/* > \verbatim */
+/* >          DU2 is REAL array, dimension (N-2) */
+/* >          The (n-2) elements of the second superdiagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
+/* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
+/* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
+/* >          required. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ANORM */
+/* > \verbatim */
+/* >          ANORM is REAL */
+/* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
+/* >          If NORM = 'I', the infinity-norm of the original matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >          The reciprocal of the condition number of the matrix A, */
+/* >          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */
+/* >          estimate of the 1-norm of inv(A) computed in this routine. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (2*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGTcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgtcon_(char *norm, integer *n, real *dl, real *d__, 
+	real *du, real *du2, integer *ipiv, real *anorm, real *rcond, real *
+	work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    integer kase, kase1, i__;
+    extern logical lsame_(char *, char *);
+    integer isave[3];
+    extern /* Subroutine */ int slacn2_(integer *, real *, real *, integer *, 
+	    real *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
+    real ainvnm;
+    logical onenrm;
+    extern /* Subroutine */ int sgttrs_(char *, integer *, integer *, real *, 
+	    real *, real *, real *, integer *, real *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments. */
+
+    /* Parameter adjustments */
+    --iwork;
+    --work;
+    --ipiv;
+    --du2;
+    --du;
+    --d__;
+    --dl;
+
+    /* Function Body */
+    *info = 0;
+    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
+    if (! onenrm && ! lsame_(norm, "I")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*anorm < 0.f) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGTCON", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    *rcond = 0.f;
+    if (*n == 0) {
+	*rcond = 1.f;
+	return 0;
+    } else if (*anorm == 0.f) {
+	return 0;
+    }
+
+/*     Check that D(1:N) is non-zero. */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (d__[i__] == 0.f) {
+	    return 0;
+	}
+/* L10: */
+    }
+
+    ainvnm = 0.f;
+    if (onenrm) {
+	kase1 = 1;
+    } else {
+	kase1 = 2;
+    }
+    kase = 0;
+L20:
+    slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
+    if (kase != 0) {
+	if (kase == kase1) {
+
+/*           Multiply by inv(U)*inv(L). */
+
+	    sgttrs_("No transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1]
+		    , &ipiv[1], &work[1], n, info);
+	} else {
+
+/*           Multiply by inv(L**T)*inv(U**T). */
+
+	    sgttrs_("Transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1], &
+		    ipiv[1], &work[1], n, info);
+	}
+	goto L20;
+    }
+
+/*     Compute the estimate of the reciprocal condition number. */
+
+    if (ainvnm != 0.f) {
+	*rcond = 1.f / ainvnm / *anorm;
+    }
+
+    return 0;
+
+/*     End of SGTCON */
+
+} /* sgtcon_ */
+
diff --git a/lapack-netlib/SRC/sgtrfs.c b/lapack-netlib/SRC/sgtrfs.c
new file mode 100644
index 000000000..6135ed771
--- /dev/null
+++ b/lapack-netlib/SRC/sgtrfs.c
@@ -0,0 +1,913 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b18 = -1.f;
+static real c_b19 = 1.f;
+
+/* > \brief \b SGTRFS */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGTRFS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtrfs.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtrfs.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtrfs.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, */
+/*                          IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, */
+/*                          INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDB, LDX, N, NRHS */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       REAL               B( LDB, * ), BERR( * ), D( * ), DF( * ), */
+/*      $                   DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), */
+/*      $                   FERR( * ), WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGTRFS improves the computed solution to a system of linear */
+/* > equations when the coefficient matrix is tridiagonal, and provides */
+/* > error bounds and backward error estimates for the solution. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations: */
+/* >          = 'N':  A * X = B     (No transpose) */
+/* >          = 'T':  A**T * X = B  (Transpose) */
+/* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DL */
+/* > \verbatim */
+/* >          DL is REAL array, dimension (N-1) */
+/* >          The (n-1) subdiagonal elements of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          The diagonal elements of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DU */
+/* > \verbatim */
+/* >          DU is REAL array, dimension (N-1) */
+/* >          The (n-1) superdiagonal elements of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DLF */
+/* > \verbatim */
+/* >          DLF is REAL array, dimension (N-1) */
+/* >          The (n-1) multipliers that define the matrix L from the */
+/* >          LU factorization of A as computed by SGTTRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DF */
+/* > \verbatim */
+/* >          DF is REAL array, dimension (N) */
+/* >          The n diagonal elements of the upper triangular matrix U from */
+/* >          the LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DUF */
+/* > \verbatim */
+/* >          DUF is REAL array, dimension (N-1) */
+/* >          The (n-1) elements of the first superdiagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DU2 */
+/* > \verbatim */
+/* >          DU2 is REAL array, dimension (N-2) */
+/* >          The (n-2) elements of the second superdiagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
+/* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
+/* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
+/* >          required. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          The right hand side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension (LDX,NRHS) */
+/* >          On entry, the solution matrix X, as computed by SGTTRS. */
+/* >          On exit, the improved solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] FERR */
+/* > \verbatim */
+/* >          FERR is REAL array, dimension (NRHS) */
+/* >          The estimated forward error bound for each solution vector */
+/* >          X(j) (the j-th column of the solution matrix X). */
+/* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
+/* >          is an estimated upper bound for the magnitude of the largest */
+/* >          element in (X(j) - XTRUE) divided by the magnitude of the */
+/* >          largest element in X(j).  The estimate is as reliable as */
+/* >          the estimate for RCOND, and is almost always a slight */
+/* >          overestimate of the true error. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is REAL array, dimension (NRHS) */
+/* >          The componentwise relative backward error of each solution */
+/* >          vector X(j) (i.e., the smallest relative change in */
+/* >          any element of A or B that makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (3*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/* > \par Internal Parameters: */
+/*  ========================= */
+/* > */
+/* > \verbatim */
+/* >  ITMAX is the maximum number of steps of iterative refinement. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGTcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgtrfs_(char *trans, integer *n, integer *nrhs, real *dl,
+	 real *d__, real *du, real *dlf, real *df, real *duf, real *du2, 
+	integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real *
+	ferr, real *berr, real *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2;
+    real r__1, r__2, r__3, r__4;
+
+    /* Local variables */
+    integer kase;
+    real safe1, safe2;
+    integer i__, j;
+    real s;
+    extern logical lsame_(char *, char *);
+    integer isave[3], count;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *), saxpy_(integer *, real *, real *, integer *, real *, 
+	    integer *), slacn2_(integer *, real *, real *, integer *, real *, 
+	    integer *, integer *);
+    extern real slamch_(char *);
+    integer nz;
+    real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), slagtm_(
+	    char *, integer *, integer *, real *, real *, real *, real *, 
+	    real *, integer *, real *, real *, integer *);
+    logical notran;
+    char transn[1], transt[1];
+    real lstres;
+    extern /* Subroutine */ int sgttrs_(char *, integer *, integer *, real *, 
+	    real *, real *, real *, integer *, real *, integer *, integer *);
+    real eps;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    --dl;
+    --d__;
+    --du;
+    --dlf;
+    --df;
+    --duf;
+    --du2;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --ferr;
+    --berr;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -13;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -15;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGTRFS", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    ferr[j] = 0.f;
+	    berr[j] = 0.f;
+/* L10: */
+	}
+	return 0;
+    }
+
+    if (notran) {
+	*(unsigned char *)transn = 'N';
+	*(unsigned char *)transt = 'T';
+    } else {
+	*(unsigned char *)transn = 'T';
+	*(unsigned char *)transt = 'N';
+    }
+
+/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */
+
+    nz = 4;
+    eps = slamch_("Epsilon");
+    safmin = slamch_("Safe minimum");
+    safe1 = nz * safmin;
+    safe2 = safe1 / eps;
+
+/*     Do for each right hand side */
+
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+
+	count = 1;
+	lstres = 3.f;
+L20:
+
+/*        Loop until stopping criterion is satisfied. */
+
+/*        Compute residual R = B - op(A) * X, */
+/*        where op(A) = A, A**T, or A**H, depending on TRANS. */
+
+	scopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
+	slagtm_(trans, n, &c__1, &c_b18, &dl[1], &d__[1], &du[1], &x[j * 
+		x_dim1 + 1], ldx, &c_b19, &work[*n + 1], n);
+
+/*        Compute abs(op(A))*abs(x) + abs(b) for use in the backward */
+/*        error bound. */
+
+	if (notran) {
+	    if (*n == 1) {
+		work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[
+			1] * x[j * x_dim1 + 1], abs(r__2));
+	    } else {
+		work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[
+			1] * x[j * x_dim1 + 1], abs(r__2)) + (r__3 = du[1] * 
+			x[j * x_dim1 + 2], abs(r__3));
+		i__2 = *n - 1;
+		for (i__ = 2; i__ <= i__2; ++i__) {
+		    work[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1)) + (
+			    r__2 = dl[i__ - 1] * x[i__ - 1 + j * x_dim1], abs(
+			    r__2)) + (r__3 = d__[i__] * x[i__ + j * x_dim1], 
+			    abs(r__3)) + (r__4 = du[i__] * x[i__ + 1 + j * 
+			    x_dim1], abs(r__4));
+/* L30: */
+		}
+		work[*n] = (r__1 = b[*n + j * b_dim1], abs(r__1)) + (r__2 = 
+			dl[*n - 1] * x[*n - 1 + j * x_dim1], abs(r__2)) + (
+			r__3 = d__[*n] * x[*n + j * x_dim1], abs(r__3));
+	    }
+	} else {
+	    if (*n == 1) {
+		work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[
+			1] * x[j * x_dim1 + 1], abs(r__2));
+	    } else {
+		work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[
+			1] * x[j * x_dim1 + 1], abs(r__2)) + (r__3 = dl[1] * 
+			x[j * x_dim1 + 2], abs(r__3));
+		i__2 = *n - 1;
+		for (i__ = 2; i__ <= i__2; ++i__) {
+		    work[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1)) + (
+			    r__2 = du[i__ - 1] * x[i__ - 1 + j * x_dim1], abs(
+			    r__2)) + (r__3 = d__[i__] * x[i__ + j * x_dim1], 
+			    abs(r__3)) + (r__4 = dl[i__] * x[i__ + 1 + j * 
+			    x_dim1], abs(r__4));
+/* L40: */
+		}
+		work[*n] = (r__1 = b[*n + j * b_dim1], abs(r__1)) + (r__2 = 
+			du[*n - 1] * x[*n - 1 + j * x_dim1], abs(r__2)) + (
+			r__3 = d__[*n] * x[*n + j * x_dim1], abs(r__3));
+	    }
+	}
+
+/*        Compute componentwise relative backward error from formula */
+
+/*        f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */
+
+/*        where abs(Z) is the componentwise absolute value of the matrix */
+/*        or vector Z.  If the i-th component of the denominator is less */
+/*        than SAFE2, then SAFE1 is added to the i-th components of the */
+/*        numerator and denominator before dividing. */
+
+	s = 0.f;
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    if (work[i__] > safe2) {
+/* Computing MAX */
+		r__2 = s, r__3 = (r__1 = work[*n + i__], abs(r__1)) / work[
+			i__];
+		s = f2cmax(r__2,r__3);
+	    } else {
+/* Computing MAX */
+		r__2 = s, r__3 = ((r__1 = work[*n + i__], abs(r__1)) + safe1) 
+			/ (work[i__] + safe1);
+		s = f2cmax(r__2,r__3);
+	    }
+/* L50: */
+	}
+	berr[j] = s;
+
+/*        Test stopping criterion. Continue iterating if */
+/*           1) The residual BERR(J) is larger than machine epsilon, and */
+/*           2) BERR(J) decreased by at least a factor of 2 during the */
+/*              last iteration, and */
+/*           3) At most ITMAX iterations tried. */
+
+	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {
+
+/*           Update solution and try again. */
+
+	    sgttrs_(trans, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[
+		    1], &work[*n + 1], n, info);
+	    saxpy_(n, &c_b19, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
+		    ;
+	    lstres = berr[j];
+	    ++count;
+	    goto L20;
+	}
+
+/*        Bound error from formula */
+
+/*        norm(X - XTRUE) / norm(X) .le. FERR = */
+/*        norm( abs(inv(op(A)))* */
+/*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */
+
+/*        where */
+/*          norm(Z) is the magnitude of the largest component of Z */
+/*          inv(op(A)) is the inverse of op(A) */
+/*          abs(Z) is the componentwise absolute value of the matrix or */
+/*             vector Z */
+/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
+/*          EPS is machine epsilon */
+
+/*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
+/*        is incremented by SAFE1 if the i-th component of */
+/*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */
+
+/*        Use SLACN2 to estimate the infinity-norm of the matrix */
+/*           inv(op(A)) * diag(W), */
+/*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
+
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    if (work[i__] > safe2) {
+		work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * 
+			work[i__];
+	    } else {
+		work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * 
+			work[i__] + safe1;
+	    }
+/* L60: */
+	}
+
+	kase = 0;
+L70:
+	slacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
+		kase, isave);
+	if (kase != 0) {
+	    if (kase == 1) {
+
+/*              Multiply by diag(W)*inv(op(A)**T). */
+
+		sgttrs_(transt, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &
+			ipiv[1], &work[*n + 1], n, info);
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    work[*n + i__] = work[i__] * work[*n + i__];
+/* L80: */
+		}
+	    } else {
+
+/*              Multiply by inv(op(A))*diag(W). */
+
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    work[*n + i__] = work[i__] * work[*n + i__];
+/* L90: */
+		}
+		sgttrs_(transn, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &
+			ipiv[1], &work[*n + 1], n, info);
+	    }
+	    goto L70;
+	}
+
+/*        Normalize error. */
+
+	lstres = 0.f;
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    r__2 = lstres, r__3 = (r__1 = x[i__ + j * x_dim1], abs(r__1));
+	    lstres = f2cmax(r__2,r__3);
+/* L100: */
+	}
+	if (lstres != 0.f) {
+	    ferr[j] /= lstres;
+	}
+
+/* L110: */
+    }
+
+    return 0;
+
+/*     End of SGTRFS */
+
+} /* sgtrfs_ */
+
diff --git a/lapack-netlib/SRC/sgtsv.c b/lapack-netlib/SRC/sgtsv.c
new file mode 100644
index 000000000..3bf62e8a0
--- /dev/null
+++ b/lapack-netlib/SRC/sgtsv.c
@@ -0,0 +1,745 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief <b> SGTSV computes the solution to system of linear equations A * X = B for GT matrices </b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGTSV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtsv.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtsv.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtsv.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) */
+
+/*       INTEGER            INFO, LDB, N, NRHS */
+/*       REAL               B( LDB, * ), D( * ), DL( * ), DU( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGTSV  solves the equation */
+/* > */
+/* >    A*X = B, */
+/* > */
+/* > where A is an n by n tridiagonal matrix, by Gaussian elimination with */
+/* > partial pivoting. */
+/* > */
+/* > Note that the equation  A**T*X = B  may be solved by interchanging the */
+/* > order of the arguments DU and DL. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] DL */
+/* > \verbatim */
+/* >          DL is REAL array, dimension (N-1) */
+/* >          On entry, DL must contain the (n-1) sub-diagonal elements of */
+/* >          A. */
+/* > */
+/* >          On exit, DL is overwritten by the (n-2) elements of the */
+/* >          second super-diagonal of the upper triangular matrix U from */
+/* >          the LU factorization of A, in DL(1), ..., DL(n-2). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          On entry, D must contain the diagonal elements of A. */
+/* > */
+/* >          On exit, D is overwritten by the n diagonal elements of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] DU */
+/* > \verbatim */
+/* >          DU is REAL array, dimension (N-1) */
+/* >          On entry, DU must contain the (n-1) super-diagonal elements */
+/* >          of A. */
+/* > */
+/* >          On exit, DU is overwritten by the (n-1) elements of the first */
+/* >          super-diagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the N by NRHS matrix of right hand side matrix B. */
+/* >          On exit, if INFO = 0, the N by NRHS solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0: if INFO = i, U(i,i) is exactly zero, and the solution */
+/* >               has not been computed.  The factorization has not been */
+/* >               completed unless i = N. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGTsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgtsv_(integer *n, integer *nrhs, real *dl, real *d__, 
+	real *du, real *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Local variables */
+    real fact, temp;
+    integer i__, j;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    --dl;
+    --d__;
+    --du;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGTSV ", &i__1, (ftnlen)5);
+	return 0;
+    }
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*nrhs == 1) {
+	i__1 = *n - 2;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {
+
+/*              No row interchange required */
+
+		if (d__[i__] != 0.f) {
+		    fact = dl[i__] / d__[i__];
+		    d__[i__ + 1] -= fact * du[i__];
+		    b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1];
+		} else {
+		    *info = i__;
+		    return 0;
+		}
+		dl[i__] = 0.f;
+	    } else {
+
+/*              Interchange rows I and I+1 */
+
+		fact = d__[i__] / dl[i__];
+		d__[i__] = dl[i__];
+		temp = d__[i__ + 1];
+		d__[i__ + 1] = du[i__] - fact * temp;
+		dl[i__] = du[i__ + 1];
+		du[i__ + 1] = -fact * dl[i__];
+		du[i__] = temp;
+		temp = b[i__ + b_dim1];
+		b[i__ + b_dim1] = b[i__ + 1 + b_dim1];
+		b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1];
+	    }
+/* L10: */
+	}
+	if (*n > 1) {
+	    i__ = *n - 1;
+	    if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {
+		if (d__[i__] != 0.f) {
+		    fact = dl[i__] / d__[i__];
+		    d__[i__ + 1] -= fact * du[i__];
+		    b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1];
+		} else {
+		    *info = i__;
+		    return 0;
+		}
+	    } else {
+		fact = d__[i__] / dl[i__];
+		d__[i__] = dl[i__];
+		temp = d__[i__ + 1];
+		d__[i__ + 1] = du[i__] - fact * temp;
+		du[i__] = temp;
+		temp = b[i__ + b_dim1];
+		b[i__ + b_dim1] = b[i__ + 1 + b_dim1];
+		b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1];
+	    }
+	}
+	if (d__[*n] == 0.f) {
+	    *info = *n;
+	    return 0;
+	}
+    } else {
+	i__1 = *n - 2;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {
+
+/*              No row interchange required */
+
+		if (d__[i__] != 0.f) {
+		    fact = dl[i__] / d__[i__];
+		    d__[i__ + 1] -= fact * du[i__];
+		    i__2 = *nrhs;
+		    for (j = 1; j <= i__2; ++j) {
+			b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1];
+/* L20: */
+		    }
+		} else {
+		    *info = i__;
+		    return 0;
+		}
+		dl[i__] = 0.f;
+	    } else {
+
+/*              Interchange rows I and I+1 */
+
+		fact = d__[i__] / dl[i__];
+		d__[i__] = dl[i__];
+		temp = d__[i__ + 1];
+		d__[i__ + 1] = du[i__] - fact * temp;
+		dl[i__] = du[i__ + 1];
+		du[i__ + 1] = -fact * dl[i__];
+		du[i__] = temp;
+		i__2 = *nrhs;
+		for (j = 1; j <= i__2; ++j) {
+		    temp = b[i__ + j * b_dim1];
+		    b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1];
+		    b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * 
+			    b_dim1];
+/* L30: */
+		}
+	    }
+/* L40: */
+	}
+	if (*n > 1) {
+	    i__ = *n - 1;
+	    if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {
+		if (d__[i__] != 0.f) {
+		    fact = dl[i__] / d__[i__];
+		    d__[i__ + 1] -= fact * du[i__];
+		    i__1 = *nrhs;
+		    for (j = 1; j <= i__1; ++j) {
+			b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1];
+/* L50: */
+		    }
+		} else {
+		    *info = i__;
+		    return 0;
+		}
+	    } else {
+		fact = d__[i__] / dl[i__];
+		d__[i__] = dl[i__];
+		temp = d__[i__ + 1];
+		d__[i__ + 1] = du[i__] - fact * temp;
+		du[i__] = temp;
+		i__1 = *nrhs;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = b[i__ + j * b_dim1];
+		    b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1];
+		    b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * 
+			    b_dim1];
+/* L60: */
+		}
+	    }
+	}
+	if (d__[*n] == 0.f) {
+	    *info = *n;
+	    return 0;
+	}
+    }
+
+/*     Back solve with the matrix U from the factorization. */
+
+    if (*nrhs <= 2) {
+	j = 1;
+L70:
+	b[*n + j * b_dim1] /= d__[*n];
+	if (*n > 1) {
+	    b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] * b[
+		    *n + j * b_dim1]) / d__[*n - 1];
+	}
+	for (i__ = *n - 2; i__ >= 1; --i__) {
+	    b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + 1 
+		    + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1]) / d__[
+		    i__];
+/* L80: */
+	}
+	if (j < *nrhs) {
+	    ++j;
+	    goto L70;
+	}
+    } else {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    b[*n + j * b_dim1] /= d__[*n];
+	    if (*n > 1) {
+		b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] 
+			* b[*n + j * b_dim1]) / d__[*n - 1];
+	    }
+	    for (i__ = *n - 2; i__ >= 1; --i__) {
+		b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ 
+			+ 1 + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1])
+			 / d__[i__];
+/* L90: */
+	    }
+/* L100: */
+	}
+    }
+
+    return 0;
+
+/*     End of SGTSV */
+
+} /* sgtsv_ */
+
diff --git a/lapack-netlib/SRC/sgtsvx.c b/lapack-netlib/SRC/sgtsvx.c
new file mode 100644
index 000000000..b0f304eb3
--- /dev/null
+++ b/lapack-netlib/SRC/sgtsvx.c
@@ -0,0 +1,828 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief <b> SGTSVX computes the solution to system of linear equations A * X = B for GT matrices </b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGTSVX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtsvx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtsvx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtsvx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, */
+/*                          DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, */
+/*                          WORK, IWORK, INFO ) */
+
+/*       CHARACTER          FACT, TRANS */
+/*       INTEGER            INFO, LDB, LDX, N, NRHS */
+/*       REAL               RCOND */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       REAL               B( LDB, * ), BERR( * ), D( * ), DF( * ), */
+/*      $                   DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), */
+/*      $                   FERR( * ), WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGTSVX uses the LU factorization to compute the solution to a real */
+/* > system of linear equations A * X = B or A**T * X = B, */
+/* > where A is a tridiagonal matrix of order N and X and B are N-by-NRHS */
+/* > matrices. */
+/* > */
+/* > Error bounds on the solution and a condition estimate are also */
+/* > provided. */
+/* > \endverbatim */
+
+/* > \par Description: */
+/*  ================= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The following steps are performed: */
+/* > */
+/* > 1. If FACT = 'N', the LU decomposition is used to factor the matrix A */
+/* >    as A = L * U, where L is a product of permutation and unit lower */
+/* >    bidiagonal matrices and U is upper triangular with nonzeros in */
+/* >    only the main diagonal and first two superdiagonals. */
+/* > */
+/* > 2. If some U(i,i)=0, so that U is exactly singular, then the routine */
+/* >    returns with INFO = i. Otherwise, the factored form of A is used */
+/* >    to estimate the condition number of the matrix A.  If the */
+/* >    reciprocal of the condition number is less than machine precision, */
+/* >    INFO = N+1 is returned as a warning, but the routine still goes on */
+/* >    to solve for X and compute error bounds as described below. */
+/* > */
+/* > 3. The system of equations is solved for X using the factored form */
+/* >    of A. */
+/* > */
+/* > 4. Iterative refinement is applied to improve the computed solution */
+/* >    matrix and calculate error bounds and backward error estimates */
+/* >    for it. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] FACT */
+/* > \verbatim */
+/* >          FACT is CHARACTER*1 */
+/* >          Specifies whether or not the factored form of A has been */
+/* >          supplied on entry. */
+/* >          = 'F':  DLF, DF, DUF, DU2, and IPIV contain the factored */
+/* >                  form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV */
+/* >                  will not be modified. */
+/* >          = 'N':  The matrix will be copied to DLF, DF, and DUF */
+/* >                  and factored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations: */
+/* >          = 'N':  A * X = B     (No transpose) */
+/* >          = 'T':  A**T * X = B  (Transpose) */
+/* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DL */
+/* > \verbatim */
+/* >          DL is REAL array, dimension (N-1) */
+/* >          The (n-1) subdiagonal elements of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          The n diagonal elements of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DU */
+/* > \verbatim */
+/* >          DU is REAL array, dimension (N-1) */
+/* >          The (n-1) superdiagonal elements of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] DLF */
+/* > \verbatim */
+/* >          DLF is REAL array, dimension (N-1) */
+/* >          If FACT = 'F', then DLF is an input argument and on entry */
+/* >          contains the (n-1) multipliers that define the matrix L from */
+/* >          the LU factorization of A as computed by SGTTRF. */
+/* > */
+/* >          If FACT = 'N', then DLF is an output argument and on exit */
+/* >          contains the (n-1) multipliers that define the matrix L from */
+/* >          the LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] DF */
+/* > \verbatim */
+/* >          DF is REAL array, dimension (N) */
+/* >          If FACT = 'F', then DF is an input argument and on entry */
+/* >          contains the n diagonal elements of the upper triangular */
+/* >          matrix U from the LU factorization of A. */
+/* > */
+/* >          If FACT = 'N', then DF is an output argument and on exit */
+/* >          contains the n diagonal elements of the upper triangular */
+/* >          matrix U from the LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] DUF */
+/* > \verbatim */
+/* >          DUF is REAL array, dimension (N-1) */
+/* >          If FACT = 'F', then DUF is an input argument and on entry */
+/* >          contains the (n-1) elements of the first superdiagonal of U. */
+/* > */
+/* >          If FACT = 'N', then DUF is an output argument and on exit */
+/* >          contains the (n-1) elements of the first superdiagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] DU2 */
+/* > \verbatim */
+/* >          DU2 is REAL array, dimension (N-2) */
+/* >          If FACT = 'F', then DU2 is an input argument and on entry */
+/* >          contains the (n-2) elements of the second superdiagonal of */
+/* >          U. */
+/* > */
+/* >          If FACT = 'N', then DU2 is an output argument and on exit */
+/* >          contains the (n-2) elements of the second superdiagonal of */
+/* >          U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          If FACT = 'F', then IPIV is an input argument and on entry */
+/* >          contains the pivot indices from the LU factorization of A as */
+/* >          computed by SGTTRF. */
+/* > */
+/* >          If FACT = 'N', then IPIV is an output argument and on exit */
+/* >          contains the pivot indices from the LU factorization of A; */
+/* >          row i of the matrix was interchanged with row IPIV(i). */
+/* >          IPIV(i) will always be either i or i+1; IPIV(i) = i indicates */
+/* >          a row interchange was not required. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          The N-by-NRHS right hand side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension (LDX,NRHS) */
+/* >          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >          The estimate of the reciprocal condition number of the matrix */
+/* >          A.  If RCOND is less than the machine precision (in */
+/* >          particular, if RCOND = 0), the matrix is singular to working */
+/* >          precision.  This condition is indicated by a return code of */
+/* >          INFO > 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] FERR */
+/* > \verbatim */
+/* >          FERR is REAL array, dimension (NRHS) */
+/* >          The estimated forward error bound for each solution vector */
+/* >          X(j) (the j-th column of the solution matrix X). */
+/* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
+/* >          is an estimated upper bound for the magnitude of the largest */
+/* >          element in (X(j) - XTRUE) divided by the magnitude of the */
+/* >          largest element in X(j).  The estimate is as reliable as */
+/* >          the estimate for RCOND, and is almost always a slight */
+/* >          overestimate of the true error. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is REAL array, dimension (NRHS) */
+/* >          The componentwise relative backward error of each solution */
+/* >          vector X(j) (i.e., the smallest relative change in */
+/* >          any element of A or B that makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (3*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, and i is */
+/* >                <= N:  U(i,i) is exactly zero.  The factorization */
+/* >                       has not been completed unless i = N, but the */
+/* >                       factor U is exactly singular, so the solution */
+/* >                       and error bounds could not be computed. */
+/* >                       RCOND = 0 is returned. */
+/* >                = N+1: U is nonsingular, but RCOND is less than machine */
+/* >                       precision, meaning that the matrix is singular */
+/* >                       to working precision.  Nevertheless, the */
+/* >                       solution and error bounds are computed because */
+/* >                       there are a number of situations where the */
+/* >                       computed solution can be more accurate than the */
+/* >                       value of RCOND would suggest. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGTsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgtsvx_(char *fact, char *trans, integer *n, integer *
+	nrhs, real *dl, real *d__, real *du, real *dlf, real *df, real *duf, 
+	real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer *
+	ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, 
+	integer *info)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, x_dim1, x_offset, i__1;
+
+    /* Local variables */
+    char norm[1];
+    extern logical lsame_(char *, char *);
+    real anorm;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    extern real slamch_(char *);
+    logical nofact;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern real slangt_(char *, integer *, real *, real *, real *);
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
+	    integer *, real *, integer *), sgtcon_(char *, integer *, 
+	    real *, real *, real *, real *, integer *, real *, real *, real *,
+	     integer *, integer *);
+    logical notran;
+    extern /* Subroutine */ int sgtrfs_(char *, integer *, integer *, real *, 
+	    real *, real *, real *, real *, real *, real *, integer *, real *,
+	     integer *, real *, integer *, real *, real *, real *, integer *, 
+	    integer *), sgttrf_(integer *, real *, real *, real *, 
+	    real *, integer *, integer *), sgttrs_(char *, integer *, integer 
+	    *, real *, real *, real *, real *, integer *, real *, integer *, 
+	    integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    --dl;
+    --d__;
+    --du;
+    --dlf;
+    --df;
+    --duf;
+    --du2;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --ferr;
+    --berr;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    nofact = lsame_(fact, "N");
+    notran = lsame_(trans, "N");
+    if (! nofact && ! lsame_(fact, "F")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -14;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -16;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGTSVX", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    if (nofact) {
+
+/*        Compute the LU factorization of A. */
+
+	scopy_(n, &d__[1], &c__1, &df[1], &c__1);
+	if (*n > 1) {
+	    i__1 = *n - 1;
+	    scopy_(&i__1, &dl[1], &c__1, &dlf[1], &c__1);
+	    i__1 = *n - 1;
+	    scopy_(&i__1, &du[1], &c__1, &duf[1], &c__1);
+	}
+	sgttrf_(n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], info);
+
+/*        Return if INFO is non-zero. */
+
+	if (*info > 0) {
+	    *rcond = 0.f;
+	    return 0;
+	}
+    }
+
+/*     Compute the norm of the matrix A. */
+
+    if (notran) {
+	*(unsigned char *)norm = '1';
+    } else {
+	*(unsigned char *)norm = 'I';
+    }
+    anorm = slangt_(norm, n, &dl[1], &d__[1], &du[1]);
+
+/*     Compute the reciprocal of the condition number of A. */
+
+    sgtcon_(norm, n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &anorm, 
+	    rcond, &work[1], &iwork[1], info);
+
+/*     Compute the solution vectors X. */
+
+    slacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
+    sgttrs_(trans, n, nrhs, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &x[
+	    x_offset], ldx, info);
+
+/*     Use iterative refinement to improve the computed solutions and */
+/*     compute error bounds and backward error estimates for them. */
+
+    sgtrfs_(trans, n, nrhs, &dl[1], &d__[1], &du[1], &dlf[1], &df[1], &duf[1],
+	     &du2[1], &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1]
+	    , &berr[1], &work[1], &iwork[1], info);
+
+/*     Set INFO = N+1 if the matrix is singular to working precision. */
+
+    if (*rcond < slamch_("Epsilon")) {
+	*info = *n + 1;
+    }
+
+    return 0;
+
+/*     End of SGTSVX */
+
+} /* sgtsvx_ */
+
diff --git a/lapack-netlib/SRC/sgttrf.c b/lapack-netlib/SRC/sgttrf.c
new file mode 100644
index 000000000..ba43b3902
--- /dev/null
+++ b/lapack-netlib/SRC/sgttrf.c
@@ -0,0 +1,632 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGTTRF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGTTRF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgttrf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgttrf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgttrf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) */
+
+/*       INTEGER            INFO, N */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               D( * ), DL( * ), DU( * ), DU2( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGTTRF computes an LU factorization of a real tridiagonal matrix A */
+/* > using elimination with partial pivoting and row interchanges. */
+/* > */
+/* > The factorization has the form */
+/* >    A = L * U */
+/* > where L is a product of permutation and unit lower bidiagonal */
+/* > matrices and U is upper triangular with nonzeros in only the main */
+/* > diagonal and first two superdiagonals. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] DL */
+/* > \verbatim */
+/* >          DL is REAL array, dimension (N-1) */
+/* >          On entry, DL must contain the (n-1) sub-diagonal elements of */
+/* >          A. */
+/* > */
+/* >          On exit, DL is overwritten by the (n-1) multipliers that */
+/* >          define the matrix L from the LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          On entry, D must contain the diagonal elements of A. */
+/* > */
+/* >          On exit, D is overwritten by the n diagonal elements of the */
+/* >          upper triangular matrix U from the LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] DU */
+/* > \verbatim */
+/* >          DU is REAL array, dimension (N-1) */
+/* >          On entry, DU must contain the (n-1) super-diagonal elements */
+/* >          of A. */
+/* > */
+/* >          On exit, DU is overwritten by the (n-1) elements of the first */
+/* >          super-diagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] DU2 */
+/* > \verbatim */
+/* >          DU2 is REAL array, dimension (N-2) */
+/* >          On exit, DU2 is overwritten by the (n-2) elements of the */
+/* >          second super-diagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
+/* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
+/* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
+/* >          required. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -k, the k-th argument had an illegal value */
+/* >          > 0:  if INFO = k, U(k,k) is exactly zero. The factorization */
+/* >                has been completed, but the factor U is exactly */
+/* >                singular, and division by zero will occur if it is used */
+/* >                to solve a system of equations. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGTcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgttrf_(integer *n, real *dl, real *d__, real *du, real *
+	du2, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2;
+
+    /* Local variables */
+    real fact, temp;
+    integer i__;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    --ipiv;
+    --du2;
+    --du;
+    --d__;
+    --dl;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+	i__1 = -(*info);
+	xerbla_("SGTTRF", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Initialize IPIV(i) = i and DU2(I) = 0 */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	ipiv[i__] = i__;
+/* L10: */
+    }
+    i__1 = *n - 2;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	du2[i__] = 0.f;
+/* L20: */
+    }
+
+    i__1 = *n - 2;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {
+
+/*           No row interchange required, eliminate DL(I) */
+
+	    if (d__[i__] != 0.f) {
+		fact = dl[i__] / d__[i__];
+		dl[i__] = fact;
+		d__[i__ + 1] -= fact * du[i__];
+	    }
+	} else {
+
+/*           Interchange rows I and I+1, eliminate DL(I) */
+
+	    fact = d__[i__] / dl[i__];
+	    d__[i__] = dl[i__];
+	    dl[i__] = fact;
+	    temp = du[i__];
+	    du[i__] = d__[i__ + 1];
+	    d__[i__ + 1] = temp - fact * d__[i__ + 1];
+	    du2[i__] = du[i__ + 1];
+	    du[i__ + 1] = -fact * du[i__ + 1];
+	    ipiv[i__] = i__ + 1;
+	}
+/* L30: */
+    }
+    if (*n > 1) {
+	i__ = *n - 1;
+	if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) {
+	    if (d__[i__] != 0.f) {
+		fact = dl[i__] / d__[i__];
+		dl[i__] = fact;
+		d__[i__ + 1] -= fact * du[i__];
+	    }
+	} else {
+	    fact = d__[i__] / dl[i__];
+	    d__[i__] = dl[i__];
+	    dl[i__] = fact;
+	    temp = du[i__];
+	    du[i__] = d__[i__ + 1];
+	    d__[i__ + 1] = temp - fact * d__[i__ + 1];
+	    ipiv[i__] = i__ + 1;
+	}
+    }
+
+/*     Check for a zero on the diagonal of U. */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (d__[i__] == 0.f) {
+	    *info = i__;
+	    goto L50;
+	}
+/* L40: */
+    }
+L50:
+
+    return 0;
+
+/*     End of SGTTRF */
+
+} /* sgttrf_ */
+
diff --git a/lapack-netlib/SRC/sgttrs.c b/lapack-netlib/SRC/sgttrs.c
new file mode 100644
index 000000000..e616c7134
--- /dev/null
+++ b/lapack-netlib/SRC/sgttrs.c
@@ -0,0 +1,632 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+
+/* > \brief \b SGTTRS */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGTTRS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgttrs.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgttrs.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgttrs.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, */
+/*                          INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDB, N, NRHS */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGTTRS solves one of the systems of equations */
+/* >    A*X = B  or  A**T*X = B, */
+/* > with a tridiagonal matrix A using the LU factorization computed */
+/* > by SGTTRF. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations. */
+/* >          = 'N':  A * X = B  (No transpose) */
+/* >          = 'T':  A**T* X = B  (Transpose) */
+/* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DL */
+/* > \verbatim */
+/* >          DL is REAL array, dimension (N-1) */
+/* >          The (n-1) multipliers that define the matrix L from the */
+/* >          LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          The n diagonal elements of the upper triangular matrix U from */
+/* >          the LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DU */
+/* > \verbatim */
+/* >          DU is REAL array, dimension (N-1) */
+/* >          The (n-1) elements of the first super-diagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DU2 */
+/* > \verbatim */
+/* >          DU2 is REAL array, dimension (N-2) */
+/* >          The (n-2) elements of the second super-diagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
+/* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
+/* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
+/* >          required. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the matrix of right hand side vectors B. */
+/* >          On exit, B is overwritten by the solution vectors X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGTcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgttrs_(char *trans, integer *n, integer *nrhs, real *dl,
+	 real *d__, real *du, real *du2, integer *ipiv, real *b, integer *ldb,
+	 integer *info)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer j, jb, nb;
+    extern /* Subroutine */ int sgtts2_(integer *, integer *, integer *, real 
+	    *, real *, real *, real *, integer *, real *, integer *), xerbla_(
+	    char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer itrans;
+    logical notran;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    --dl;
+    --d__;
+    --du;
+    --du2;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = *(unsigned char *)trans == 'N' || *(unsigned char *)trans == 'n';
+    if (! notran && ! (*(unsigned char *)trans == 'T' || *(unsigned char *)
+	    trans == 't') && ! (*(unsigned char *)trans == 'C' || *(unsigned 
+	    char *)trans == 'c')) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*ldb < f2cmax(*n,1)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGTTRS", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+/*     Decode TRANS */
+
+    if (notran) {
+	itrans = 0;
+    } else {
+	itrans = 1;
+    }
+
+/*     Determine the number of right-hand sides to solve at a time. */
+
+    if (*nrhs == 1) {
+	nb = 1;
+    } else {
+/* Computing MAX */
+	i__1 = 1, i__2 = ilaenv_(&c__1, "SGTTRS", trans, n, nrhs, &c_n1, &
+		c_n1, (ftnlen)6, (ftnlen)1);
+	nb = f2cmax(i__1,i__2);
+    }
+
+    if (nb >= *nrhs) {
+	sgtts2_(&itrans, n, nrhs, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[1], 
+		&b[b_offset], ldb);
+    } else {
+	i__1 = *nrhs;
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = *nrhs - j + 1;
+	    jb = f2cmin(i__3,nb);
+	    sgtts2_(&itrans, n, &jb, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[
+		    1], &b[j * b_dim1 + 1], ldb);
+/* L10: */
+	}
+    }
+
+/*     End of SGTTRS */
+
+    return 0;
+} /* sgttrs_ */
+
diff --git a/lapack-netlib/SRC/sgtts2.c b/lapack-netlib/SRC/sgtts2.c
new file mode 100644
index 000000000..923eec1f8
--- /dev/null
+++ b/lapack-netlib/SRC/sgtts2.c
@@ -0,0 +1,706 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SGTTS2 solves a system of linear equations with a tridiagonal matrix using the LU factorization
+ computed by sgttrf. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SGTTS2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgtts2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgtts2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgtts2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) */
+
+/*       INTEGER            ITRANS, LDB, N, NRHS */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SGTTS2 solves one of the systems of equations */
+/* >    A*X = B  or  A**T*X = B, */
+/* > with a tridiagonal matrix A using the LU factorization computed */
+/* > by SGTTRF. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] ITRANS */
+/* > \verbatim */
+/* >          ITRANS is INTEGER */
+/* >          Specifies the form of the system of equations. */
+/* >          = 0:  A * X = B  (No transpose) */
+/* >          = 1:  A**T* X = B  (Transpose) */
+/* >          = 2:  A**T* X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DL */
+/* > \verbatim */
+/* >          DL is REAL array, dimension (N-1) */
+/* >          The (n-1) multipliers that define the matrix L from the */
+/* >          LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] D */
+/* > \verbatim */
+/* >          D is REAL array, dimension (N) */
+/* >          The n diagonal elements of the upper triangular matrix U from */
+/* >          the LU factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DU */
+/* > \verbatim */
+/* >          DU is REAL array, dimension (N-1) */
+/* >          The (n-1) elements of the first super-diagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DU2 */
+/* > \verbatim */
+/* >          DU2 is REAL array, dimension (N-2) */
+/* >          The (n-2) elements of the second super-diagonal of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices; for 1 <= i <= n, row i of the matrix was */
+/* >          interchanged with row IPIV(i).  IPIV(i) will always be either */
+/* >          i or i+1; IPIV(i) = i indicates a row interchange was not */
+/* >          required. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >          On entry, the matrix of right hand side vectors B. */
+/* >          On exit, B is overwritten by the solution vectors X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGTcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sgtts2_(integer *itrans, integer *n, integer *nrhs, real 
+	*dl, real *d__, real *du, real *du2, integer *ipiv, real *b, integer *
+	ldb)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    real temp;
+    integer i__, j, ip;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Quick return if possible */
+
+    /* Parameter adjustments */
+    --dl;
+    --d__;
+    --du;
+    --du2;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    if (*itrans == 0) {
+
+/*        Solve A*X = B using the LU factorization of A, */
+/*        overwriting each right hand side vector with its solution. */
+
+	if (*nrhs <= 1) {
+	    j = 1;
+L10:
+
+/*           Solve L*x = b. */
+
+	    i__1 = *n - 1;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		ip = ipiv[i__];
+		temp = b[i__ + 1 - ip + i__ + j * b_dim1] - dl[i__] * b[ip + 
+			j * b_dim1];
+		b[i__ + j * b_dim1] = b[ip + j * b_dim1];
+		b[i__ + 1 + j * b_dim1] = temp;
+/* L20: */
+	    }
+
+/*           Solve U*x = b. */
+
+	    b[*n + j * b_dim1] /= d__[*n];
+	    if (*n > 1) {
+		b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] 
+			* b[*n + j * b_dim1]) / d__[*n - 1];
+	    }
+	    for (i__ = *n - 2; i__ >= 1; --i__) {
+		b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ 
+			+ 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j * b_dim1]
+			) / d__[i__];
+/* L30: */
+	    }
+	    if (j < *nrhs) {
+		++j;
+		goto L10;
+	    }
+	} else {
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+
+/*              Solve L*x = b. */
+
+		i__2 = *n - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if (ipiv[i__] == i__) {
+			b[i__ + 1 + j * b_dim1] -= dl[i__] * b[i__ + j * 
+				b_dim1];
+		    } else {
+			temp = b[i__ + j * b_dim1];
+			b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1];
+			b[i__ + 1 + j * b_dim1] = temp - dl[i__] * b[i__ + j *
+				 b_dim1];
+		    }
+/* L40: */
+		}
+
+/*              Solve U*x = b. */
+
+		b[*n + j * b_dim1] /= d__[*n];
+		if (*n > 1) {
+		    b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n 
+			    - 1] * b[*n + j * b_dim1]) / d__[*n - 1];
+		}
+		for (i__ = *n - 2; i__ >= 1; --i__) {
+		    b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[
+			    i__ + 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j *
+			     b_dim1]) / d__[i__];
+/* L50: */
+		}
+/* L60: */
+	    }
+	}
+    } else {
+
+/*        Solve A**T * X = B. */
+
+	if (*nrhs <= 1) {
+
+/*           Solve U**T*x = b. */
+
+	    j = 1;
+L70:
+	    b[j * b_dim1 + 1] /= d__[1];
+	    if (*n > 1) {
+		b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * b_dim1 
+			+ 1]) / d__[2];
+	    }
+	    i__1 = *n;
+	    for (i__ = 3; i__ <= i__1; ++i__) {
+		b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] * b[
+			i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - 2 + j *
+			 b_dim1]) / d__[i__];
+/* L80: */
+	    }
+
+/*           Solve L**T*x = b. */
+
+	    for (i__ = *n - 1; i__ >= 1; --i__) {
+		ip = ipiv[i__];
+		temp = b[i__ + j * b_dim1] - dl[i__] * b[i__ + 1 + j * b_dim1]
+			;
+		b[i__ + j * b_dim1] = b[ip + j * b_dim1];
+		b[ip + j * b_dim1] = temp;
+/* L90: */
+	    }
+	    if (j < *nrhs) {
+		++j;
+		goto L70;
+	    }
+
+	} else {
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+
+/*              Solve U**T*x = b. */
+
+		b[j * b_dim1 + 1] /= d__[1];
+		if (*n > 1) {
+		    b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * 
+			    b_dim1 + 1]) / d__[2];
+		}
+		i__2 = *n;
+		for (i__ = 3; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] *
+			     b[i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - 
+			    2 + j * b_dim1]) / d__[i__];
+/* L100: */
+		}
+		for (i__ = *n - 1; i__ >= 1; --i__) {
+		    if (ipiv[i__] == i__) {
+			b[i__ + j * b_dim1] -= dl[i__] * b[i__ + 1 + j * 
+				b_dim1];
+		    } else {
+			temp = b[i__ + 1 + j * b_dim1];
+			b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - dl[
+				i__] * temp;
+			b[i__ + j * b_dim1] = temp;
+		    }
+/* L110: */
+		}
+/* L120: */
+	    }
+	}
+    }
+
+/*     End of SGTTS2 */
+
+    return 0;
+} /* sgtts2_ */
+
diff --git a/lapack-netlib/SRC/shgeqz.c b/lapack-netlib/SRC/shgeqz.c
new file mode 100644
index 000000000..7301c0e1a
--- /dev/null
+++ b/lapack-netlib/SRC/shgeqz.c
@@ -0,0 +1,1976 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static real c_b12 = 0.f;
+static real c_b13 = 1.f;
+static integer c__1 = 1;
+static integer c__3 = 3;
+
+/* > \brief \b SHGEQZ */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SHGEQZ + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/shgeqz.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/shgeqz.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/shgeqz.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, */
+/*                          ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK, */
+/*                          LWORK, INFO ) */
+
+/*       CHARACTER          COMPQ, COMPZ, JOB */
+/*       INTEGER            IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N */
+/*       REAL               ALPHAI( * ), ALPHAR( * ), BETA( * ), */
+/*      $                   H( LDH, * ), Q( LDQ, * ), T( LDT, * ), */
+/*      $                   WORK( * ), Z( LDZ, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SHGEQZ computes the eigenvalues of a real matrix pair (H,T), */
+/* > where H is an upper Hessenberg matrix and T is upper triangular, */
+/* > using the double-shift QZ method. */
+/* > Matrix pairs of this type are produced by the reduction to */
+/* > generalized upper Hessenberg form of a real matrix pair (A,B): */
+/* > */
+/* >    A = Q1*H*Z1**T,  B = Q1*T*Z1**T, */
+/* > */
+/* > as computed by SGGHRD. */
+/* > */
+/* > If JOB='S', then the Hessenberg-triangular pair (H,T) is */
+/* > also reduced to generalized Schur form, */
+/* > */
+/* >    H = Q*S*Z**T,  T = Q*P*Z**T, */
+/* > */
+/* > where Q and Z are orthogonal matrices, P is an upper triangular */
+/* > matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 */
+/* > diagonal blocks. */
+/* > */
+/* > The 1-by-1 blocks correspond to real eigenvalues of the matrix pair */
+/* > (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of */
+/* > eigenvalues. */
+/* > */
+/* > Additionally, the 2-by-2 upper triangular diagonal blocks of P */
+/* > corresponding to 2-by-2 blocks of S are reduced to positive diagonal */
+/* > form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, */
+/* > P(j,j) > 0, and P(j+1,j+1) > 0. */
+/* > */
+/* > Optionally, the orthogonal matrix Q from the generalized Schur */
+/* > factorization may be postmultiplied into an input matrix Q1, and the */
+/* > orthogonal matrix Z may be postmultiplied into an input matrix Z1. */
+/* > If Q1 and Z1 are the orthogonal matrices from SGGHRD that reduced */
+/* > the matrix pair (A,B) to generalized upper Hessenberg form, then the */
+/* > output matrices Q1*Q and Z1*Z are the orthogonal factors from the */
+/* > generalized Schur factorization of (A,B): */
+/* > */
+/* >    A = (Q1*Q)*S*(Z1*Z)**T,  B = (Q1*Q)*P*(Z1*Z)**T. */
+/* > */
+/* > To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, */
+/* > of (A,B)) are computed as a pair of values (alpha,beta), where alpha is */
+/* > complex and beta real. */
+/* > If beta is nonzero, lambda = alpha / beta is an eigenvalue of the */
+/* > generalized nonsymmetric eigenvalue problem (GNEP) */
+/* >    A*x = lambda*B*x */
+/* > and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the */
+/* > alternate form of the GNEP */
+/* >    mu*A*y = B*y. */
+/* > Real eigenvalues can be read directly from the generalized Schur */
+/* > form: */
+/* >   alpha = S(i,i), beta = P(i,i). */
+/* > */
+/* > Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix */
+/* >      Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), */
+/* >      pp. 241--256. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOB */
+/* > \verbatim */
+/* >          JOB is CHARACTER*1 */
+/* >          = 'E': Compute eigenvalues only; */
+/* >          = 'S': Compute eigenvalues and the Schur form. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] COMPQ */
+/* > \verbatim */
+/* >          COMPQ is CHARACTER*1 */
+/* >          = 'N': Left Schur vectors (Q) are not computed; */
+/* >          = 'I': Q is initialized to the unit matrix and the matrix Q */
+/* >                 of left Schur vectors of (H,T) is returned; */
+/* >          = 'V': Q must contain an orthogonal matrix Q1 on entry and */
+/* >                 the product Q1*Q is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] COMPZ */
+/* > \verbatim */
+/* >          COMPZ is CHARACTER*1 */
+/* >          = 'N': Right Schur vectors (Z) are not computed; */
+/* >          = 'I': Z is initialized to the unit matrix and the matrix Z */
+/* >                 of right Schur vectors of (H,T) is returned; */
+/* >          = 'V': Z must contain an orthogonal matrix Z1 on entry and */
+/* >                 the product Z1*Z is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrices H, T, Q, and Z.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          ILO and IHI mark the rows and columns of H which are in */
+/* >          Hessenberg form.  It is assumed that A is already upper */
+/* >          triangular in rows and columns 1:ILO-1 and IHI+1:N. */
+/* >          If N > 0, 1 <= ILO <= IHI <= N; if N = 0, ILO=1 and IHI=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] H */
+/* > \verbatim */
+/* >          H is REAL array, dimension (LDH, N) */
+/* >          On entry, the N-by-N upper Hessenberg matrix H. */
+/* >          On exit, if JOB = 'S', H contains the upper quasi-triangular */
+/* >          matrix S from the generalized Schur factorization. */
+/* >          If JOB = 'E', the diagonal blocks of H match those of S, but */
+/* >          the rest of H is unspecified. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDH */
+/* > \verbatim */
+/* >          LDH is INTEGER */
+/* >          The leading dimension of the array H.  LDH >= f2cmax( 1, N ). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] T */
+/* > \verbatim */
+/* >          T is REAL array, dimension (LDT, N) */
+/* >          On entry, the N-by-N upper triangular matrix T. */
+/* >          On exit, if JOB = 'S', T contains the upper triangular */
+/* >          matrix P from the generalized Schur factorization; */
+/* >          2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks of S */
+/* >          are reduced to positive diagonal form, i.e., if H(j+1,j) is */
+/* >          non-zero, then T(j+1,j) = T(j,j+1) = 0, T(j,j) > 0, and */
+/* >          T(j+1,j+1) > 0. */
+/* >          If JOB = 'E', the diagonal blocks of T match those of P, but */
+/* >          the rest of T is unspecified. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= f2cmax( 1, N ). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAR */
+/* > \verbatim */
+/* >          ALPHAR is REAL array, dimension (N) */
+/* >          The real parts of each scalar alpha defining an eigenvalue */
+/* >          of GNEP. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ALPHAI */
+/* > \verbatim */
+/* >          ALPHAI is REAL array, dimension (N) */
+/* >          The imaginary parts of each scalar alpha defining an */
+/* >          eigenvalue of GNEP. */
+/* >          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */
+/* >          positive, then the j-th and (j+1)-st eigenvalues are a */
+/* >          complex conjugate pair, with ALPHAI(j+1) = -ALPHAI(j). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BETA */
+/* > \verbatim */
+/* >          BETA is REAL array, dimension (N) */
+/* >          The scalars beta that define the eigenvalues of GNEP. */
+/* >          Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and */
+/* >          beta = BETA(j) represent the j-th eigenvalue of the matrix */
+/* >          pair (A,B), in one of the forms lambda = alpha/beta or */
+/* >          mu = beta/alpha.  Since either lambda or mu may overflow, */
+/* >          they should not, in general, be computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Q */
+/* > \verbatim */
+/* >          Q is REAL array, dimension (LDQ, N) */
+/* >          On entry, if COMPQ = 'V', the orthogonal matrix Q1 used in */
+/* >          the reduction of (A,B) to generalized Hessenberg form. */
+/* >          On exit, if COMPQ = 'I', the orthogonal matrix of left Schur */
+/* >          vectors of (H,T), and if COMPQ = 'V', the orthogonal matrix */
+/* >          of left Schur vectors of (A,B). */
+/* >          Not referenced if COMPQ = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDQ */
+/* > \verbatim */
+/* >          LDQ is INTEGER */
+/* >          The leading dimension of the array Q.  LDQ >= 1. */
+/* >          If COMPQ='V' or 'I', then LDQ >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Z */
+/* > \verbatim */
+/* >          Z is REAL array, dimension (LDZ, N) */
+/* >          On entry, if COMPZ = 'V', the orthogonal matrix Z1 used in */
+/* >          the reduction of (A,B) to generalized Hessenberg form. */
+/* >          On exit, if COMPZ = 'I', the orthogonal matrix of */
+/* >          right Schur vectors of (H,T), and if COMPZ = 'V', the */
+/* >          orthogonal matrix of right Schur vectors of (A,B). */
+/* >          Not referenced if COMPZ = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDZ */
+/* > \verbatim */
+/* >          LDZ is INTEGER */
+/* >          The leading dimension of the array Z.  LDZ >= 1. */
+/* >          If COMPZ='V' or 'I', then LDZ >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* >          = 1,...,N: the QZ iteration did not converge.  (H,T) is not */
+/* >                     in Schur form, but ALPHAR(i), ALPHAI(i), and */
+/* >                     BETA(i), i=INFO+1,...,N should be correct. */
+/* >          = N+1,...,2*N: the shift calculation failed.  (H,T) is not */
+/* >                     in Schur form, but ALPHAR(i), ALPHAI(i), and */
+/* >                     BETA(i), i=INFO-N+1,...,N should be correct. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  Iteration counters: */
+/* > */
+/* >  JITER  -- counts iterations. */
+/* >  IITER  -- counts iterations run since ILAST was last */
+/* >            changed.  This is therefore reset only when a 1-by-1 or */
+/* >            2-by-2 block deflates off the bottom. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int shgeqz_(char *job, char *compq, char *compz, integer *n, 
+	integer *ilo, integer *ihi, real *h__, integer *ldh, real *t, integer 
+	*ldt, real *alphar, real *alphai, real *beta, real *q, integer *ldq, 
+	real *z__, integer *ldz, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, q_dim1, q_offset, t_dim1, t_offset, z_dim1, 
+	    z_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2, r__3, r__4;
+
+    /* Local variables */
+    real ad11l, ad12l, ad21l, ad22l, ad32l, wabs, atol, btol, temp;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
+	    integer *, real *, real *), slag2_(real *, integer *, real *, 
+	    integer *, real *, real *, real *, real *, real *, real *);
+    real temp2, s1inv, c__;
+    integer j;
+    real s, v[3], scale;
+    extern logical lsame_(char *, char *);
+    integer iiter, ilast, jiter;
+    real anorm, bnorm;
+    integer maxit;
+    real tempi, tempr, s1, s2, t1, u1, u2;
+    logical ilazr2;
+    real a11, a12, a21, a22, b11, b22, c12, c21;
+    extern real slapy2_(real *, real *);
+    integer jc;
+    extern real slapy3_(real *, real *, real *);
+    real an, bn, cl;
+    extern /* Subroutine */ int slasv2_(real *, real *, real *, real *, real *
+	    , real *, real *, real *, real *);
+    real cq, cr;
+    integer in;
+    real ascale, bscale, u12, w11;
+    integer jr;
+    real cz, sl, w12, w21, w22, wi, sr;
+    extern real slamch_(char *);
+    real vs, wr, safmin;
+    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, 
+	    real *);
+    real safmax;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    real eshift;
+    logical ilschr;
+    real b1a, b2a;
+    integer icompq, ilastm;
+    extern real slanhs_(char *, integer *, real *, integer *, real *);
+    real a1i;
+    integer ischur;
+    real a2i, b1i;
+    logical ilazro;
+    integer icompz, ifirst, ifrstm;
+    real a1r;
+    integer istart;
+    logical ilpivt;
+    real a2r, b1r, b2i, b2r;
+    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
+	    ), slaset_(char *, integer *, integer *, real *, real *, real *, 
+	    integer *);
+    logical lquery;
+    real wr2, ad11, ad12, ad21, ad22, c11i, c22i;
+    integer jch;
+    real c11r, c22r;
+    logical ilq;
+    real u12l, tau, sqi;
+    logical ilz;
+    real ulp, sqr, szi, szr;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+/*    $                     SAFETY = 1.0E+0 ) */
+
+/*     Decode JOB, COMPQ, COMPZ */
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1 * 1;
+    h__ -= h_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    --alphar;
+    --alphai;
+    --beta;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1 * 1;
+    q -= q_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    if (lsame_(job, "E")) {
+	ilschr = FALSE_;
+	ischur = 1;
+    } else if (lsame_(job, "S")) {
+	ilschr = TRUE_;
+	ischur = 2;
+    } else {
+	ischur = 0;
+    }
+
+    if (lsame_(compq, "N")) {
+	ilq = FALSE_;
+	icompq = 1;
+    } else if (lsame_(compq, "V")) {
+	ilq = TRUE_;
+	icompq = 2;
+    } else if (lsame_(compq, "I")) {
+	ilq = TRUE_;
+	icompq = 3;
+    } else {
+	icompq = 0;
+    }
+
+    if (lsame_(compz, "N")) {
+	ilz = FALSE_;
+	icompz = 1;
+    } else if (lsame_(compz, "V")) {
+	ilz = TRUE_;
+	icompz = 2;
+    } else if (lsame_(compz, "I")) {
+	ilz = TRUE_;
+	icompz = 3;
+    } else {
+	icompz = 0;
+    }
+
+/*     Check Argument Values */
+
+    *info = 0;
+    work[1] = (real) f2cmax(1,*n);
+    lquery = *lwork == -1;
+    if (ischur == 0) {
+	*info = -1;
+    } else if (icompq == 0) {
+	*info = -2;
+    } else if (icompz == 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ilo < 1) {
+	*info = -5;
+    } else if (*ihi > *n || *ihi < *ilo - 1) {
+	*info = -6;
+    } else if (*ldh < *n) {
+	*info = -8;
+    } else if (*ldt < *n) {
+	*info = -10;
+    } else if (*ldq < 1 || ilq && *ldq < *n) {
+	*info = -15;
+    } else if (*ldz < 1 || ilz && *ldz < *n) {
+	*info = -17;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -19;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SHGEQZ", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+/*     Initialize Q and Z */
+
+    if (icompq == 3) {
+	slaset_("Full", n, n, &c_b12, &c_b13, &q[q_offset], ldq);
+    }
+    if (icompz == 3) {
+	slaset_("Full", n, n, &c_b12, &c_b13, &z__[z_offset], ldz);
+    }
+
+/*     Machine Constants */
+
+    in = *ihi + 1 - *ilo;
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    ulp = slamch_("E") * slamch_("B");
+    anorm = slanhs_("F", &in, &h__[*ilo + *ilo * h_dim1], ldh, &work[1]);
+    bnorm = slanhs_("F", &in, &t[*ilo + *ilo * t_dim1], ldt, &work[1]);
+/* Computing MAX */
+    r__1 = safmin, r__2 = ulp * anorm;
+    atol = f2cmax(r__1,r__2);
+/* Computing MAX */
+    r__1 = safmin, r__2 = ulp * bnorm;
+    btol = f2cmax(r__1,r__2);
+    ascale = 1.f / f2cmax(safmin,anorm);
+    bscale = 1.f / f2cmax(safmin,bnorm);
+
+/*     Set Eigenvalues IHI+1:N */
+
+    i__1 = *n;
+    for (j = *ihi + 1; j <= i__1; ++j) {
+	if (t[j + j * t_dim1] < 0.f) {
+	    if (ilschr) {
+		i__2 = j;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    h__[jr + j * h_dim1] = -h__[jr + j * h_dim1];
+		    t[jr + j * t_dim1] = -t[jr + j * t_dim1];
+/* L10: */
+		}
+	    } else {
+		h__[j + j * h_dim1] = -h__[j + j * h_dim1];
+		t[j + j * t_dim1] = -t[j + j * t_dim1];
+	    }
+	    if (ilz) {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    z__[jr + j * z_dim1] = -z__[jr + j * z_dim1];
+/* L20: */
+		}
+	    }
+	}
+	alphar[j] = h__[j + j * h_dim1];
+	alphai[j] = 0.f;
+	beta[j] = t[j + j * t_dim1];
+/* L30: */
+    }
+
+/*     If IHI < ILO, skip QZ steps */
+
+    if (*ihi < *ilo) {
+	goto L380;
+    }
+
+/*     MAIN QZ ITERATION LOOP */
+
+/*     Initialize dynamic indices */
+
+/*     Eigenvalues ILAST+1:N have been found. */
+/*        Column operations modify rows IFRSTM:whatever. */
+/*        Row operations modify columns whatever:ILASTM. */
+
+/*     If only eigenvalues are being computed, then */
+/*        IFRSTM is the row of the last splitting row above row ILAST; */
+/*        this is always at least ILO. */
+/*     IITER counts iterations since the last eigenvalue was found, */
+/*        to tell when to use an extraordinary shift. */
+/*     MAXIT is the maximum number of QZ sweeps allowed. */
+
+    ilast = *ihi;
+    if (ilschr) {
+	ifrstm = 1;
+	ilastm = *n;
+    } else {
+	ifrstm = *ilo;
+	ilastm = *ihi;
+    }
+    iiter = 0;
+    eshift = 0.f;
+    maxit = (*ihi - *ilo + 1) * 30;
+
+    i__1 = maxit;
+    for (jiter = 1; jiter <= i__1; ++jiter) {
+
+/*        Split the matrix if possible. */
+
+/*        Two tests: */
+/*           1: H(j,j-1)=0  or  j=ILO */
+/*           2: T(j,j)=0 */
+
+	if (ilast == *ilo) {
+
+/*           Special case: j=ILAST */
+
+	    goto L80;
+	} else {
+	    if ((r__1 = h__[ilast + (ilast - 1) * h_dim1], abs(r__1)) <= atol)
+		     {
+		h__[ilast + (ilast - 1) * h_dim1] = 0.f;
+		goto L80;
+	    }
+	}
+
+	if ((r__1 = t[ilast + ilast * t_dim1], abs(r__1)) <= btol) {
+	    t[ilast + ilast * t_dim1] = 0.f;
+	    goto L70;
+	}
+
+/*        General case: j<ILAST */
+
+	i__2 = *ilo;
+	for (j = ilast - 1; j >= i__2; --j) {
+
+/*           Test 1: for H(j,j-1)=0 or j=ILO */
+
+	    if (j == *ilo) {
+		ilazro = TRUE_;
+	    } else {
+		if ((r__1 = h__[j + (j - 1) * h_dim1], abs(r__1)) <= atol) {
+		    h__[j + (j - 1) * h_dim1] = 0.f;
+		    ilazro = TRUE_;
+		} else {
+		    ilazro = FALSE_;
+		}
+	    }
+
+/*           Test 2: for T(j,j)=0 */
+
+	    if ((r__1 = t[j + j * t_dim1], abs(r__1)) < btol) {
+		t[j + j * t_dim1] = 0.f;
+
+/*              Test 1a: Check for 2 consecutive small subdiagonals in A */
+
+		ilazr2 = FALSE_;
+		if (! ilazro) {
+		    temp = (r__1 = h__[j + (j - 1) * h_dim1], abs(r__1));
+		    temp2 = (r__1 = h__[j + j * h_dim1], abs(r__1));
+		    tempr = f2cmax(temp,temp2);
+		    if (tempr < 1.f && tempr != 0.f) {
+			temp /= tempr;
+			temp2 /= tempr;
+		    }
+		    if (temp * (ascale * (r__1 = h__[j + 1 + j * h_dim1], abs(
+			    r__1))) <= temp2 * (ascale * atol)) {
+			ilazr2 = TRUE_;
+		    }
+		}
+
+/*              If both tests pass (1 & 2), i.e., the leading diagonal */
+/*              element of B in the block is zero, split a 1x1 block off */
+/*              at the top. (I.e., at the J-th row/column) The leading */
+/*              diagonal element of the remainder can also be zero, so */
+/*              this may have to be done repeatedly. */
+
+		if (ilazro || ilazr2) {
+		    i__3 = ilast - 1;
+		    for (jch = j; jch <= i__3; ++jch) {
+			temp = h__[jch + jch * h_dim1];
+			slartg_(&temp, &h__[jch + 1 + jch * h_dim1], &c__, &s,
+				 &h__[jch + jch * h_dim1]);
+			h__[jch + 1 + jch * h_dim1] = 0.f;
+			i__4 = ilastm - jch;
+			srot_(&i__4, &h__[jch + (jch + 1) * h_dim1], ldh, &
+				h__[jch + 1 + (jch + 1) * h_dim1], ldh, &c__, 
+				&s);
+			i__4 = ilastm - jch;
+			srot_(&i__4, &t[jch + (jch + 1) * t_dim1], ldt, &t[
+				jch + 1 + (jch + 1) * t_dim1], ldt, &c__, &s);
+			if (ilq) {
+			    srot_(n, &q[jch * q_dim1 + 1], &c__1, &q[(jch + 1)
+				     * q_dim1 + 1], &c__1, &c__, &s);
+			}
+			if (ilazr2) {
+			    h__[jch + (jch - 1) * h_dim1] *= c__;
+			}
+			ilazr2 = FALSE_;
+			if ((r__1 = t[jch + 1 + (jch + 1) * t_dim1], abs(r__1)
+				) >= btol) {
+			    if (jch + 1 >= ilast) {
+				goto L80;
+			    } else {
+				ifirst = jch + 1;
+				goto L110;
+			    }
+			}
+			t[jch + 1 + (jch + 1) * t_dim1] = 0.f;
+/* L40: */
+		    }
+		    goto L70;
+		} else {
+
+/*                 Only test 2 passed -- chase the zero to T(ILAST,ILAST) */
+/*                 Then process as in the case T(ILAST,ILAST)=0 */
+
+		    i__3 = ilast - 1;
+		    for (jch = j; jch <= i__3; ++jch) {
+			temp = t[jch + (jch + 1) * t_dim1];
+			slartg_(&temp, &t[jch + 1 + (jch + 1) * t_dim1], &c__,
+				 &s, &t[jch + (jch + 1) * t_dim1]);
+			t[jch + 1 + (jch + 1) * t_dim1] = 0.f;
+			if (jch < ilastm - 1) {
+			    i__4 = ilastm - jch - 1;
+			    srot_(&i__4, &t[jch + (jch + 2) * t_dim1], ldt, &
+				    t[jch + 1 + (jch + 2) * t_dim1], ldt, &
+				    c__, &s);
+			}
+			i__4 = ilastm - jch + 2;
+			srot_(&i__4, &h__[jch + (jch - 1) * h_dim1], ldh, &
+				h__[jch + 1 + (jch - 1) * h_dim1], ldh, &c__, 
+				&s);
+			if (ilq) {
+			    srot_(n, &q[jch * q_dim1 + 1], &c__1, &q[(jch + 1)
+				     * q_dim1 + 1], &c__1, &c__, &s);
+			}
+			temp = h__[jch + 1 + jch * h_dim1];
+			slartg_(&temp, &h__[jch + 1 + (jch - 1) * h_dim1], &
+				c__, &s, &h__[jch + 1 + jch * h_dim1]);
+			h__[jch + 1 + (jch - 1) * h_dim1] = 0.f;
+			i__4 = jch + 1 - ifrstm;
+			srot_(&i__4, &h__[ifrstm + jch * h_dim1], &c__1, &h__[
+				ifrstm + (jch - 1) * h_dim1], &c__1, &c__, &s)
+				;
+			i__4 = jch - ifrstm;
+			srot_(&i__4, &t[ifrstm + jch * t_dim1], &c__1, &t[
+				ifrstm + (jch - 1) * t_dim1], &c__1, &c__, &s)
+				;
+			if (ilz) {
+			    srot_(n, &z__[jch * z_dim1 + 1], &c__1, &z__[(jch 
+				    - 1) * z_dim1 + 1], &c__1, &c__, &s);
+			}
+/* L50: */
+		    }
+		    goto L70;
+		}
+	    } else if (ilazro) {
+
+/*              Only test 1 passed -- work on J:ILAST */
+
+		ifirst = j;
+		goto L110;
+	    }
+
+/*           Neither test passed -- try next J */
+
+/* L60: */
+	}
+
+/*        (Drop-through is "impossible") */
+
+	*info = *n + 1;
+	goto L420;
+
+/*        T(ILAST,ILAST)=0 -- clear H(ILAST,ILAST-1) to split off a */
+/*        1x1 block. */
+
+L70:
+	temp = h__[ilast + ilast * h_dim1];
+	slartg_(&temp, &h__[ilast + (ilast - 1) * h_dim1], &c__, &s, &h__[
+		ilast + ilast * h_dim1]);
+	h__[ilast + (ilast - 1) * h_dim1] = 0.f;
+	i__2 = ilast - ifrstm;
+	srot_(&i__2, &h__[ifrstm + ilast * h_dim1], &c__1, &h__[ifrstm + (
+		ilast - 1) * h_dim1], &c__1, &c__, &s);
+	i__2 = ilast - ifrstm;
+	srot_(&i__2, &t[ifrstm + ilast * t_dim1], &c__1, &t[ifrstm + (ilast - 
+		1) * t_dim1], &c__1, &c__, &s);
+	if (ilz) {
+	    srot_(n, &z__[ilast * z_dim1 + 1], &c__1, &z__[(ilast - 1) * 
+		    z_dim1 + 1], &c__1, &c__, &s);
+	}
+
+/*        H(ILAST,ILAST-1)=0 -- Standardize B, set ALPHAR, ALPHAI, */
+/*                              and BETA */
+
+L80:
+	if (t[ilast + ilast * t_dim1] < 0.f) {
+	    if (ilschr) {
+		i__2 = ilast;
+		for (j = ifrstm; j <= i__2; ++j) {
+		    h__[j + ilast * h_dim1] = -h__[j + ilast * h_dim1];
+		    t[j + ilast * t_dim1] = -t[j + ilast * t_dim1];
+/* L90: */
+		}
+	    } else {
+		h__[ilast + ilast * h_dim1] = -h__[ilast + ilast * h_dim1];
+		t[ilast + ilast * t_dim1] = -t[ilast + ilast * t_dim1];
+	    }
+	    if (ilz) {
+		i__2 = *n;
+		for (j = 1; j <= i__2; ++j) {
+		    z__[j + ilast * z_dim1] = -z__[j + ilast * z_dim1];
+/* L100: */
+		}
+	    }
+	}
+	alphar[ilast] = h__[ilast + ilast * h_dim1];
+	alphai[ilast] = 0.f;
+	beta[ilast] = t[ilast + ilast * t_dim1];
+
+/*        Go to next block -- exit if finished. */
+
+	--ilast;
+	if (ilast < *ilo) {
+	    goto L380;
+	}
+
+/*        Reset counters */
+
+	iiter = 0;
+	eshift = 0.f;
+	if (! ilschr) {
+	    ilastm = ilast;
+	    if (ifrstm > ilast) {
+		ifrstm = *ilo;
+	    }
+	}
+	goto L350;
+
+/*        QZ step */
+
+/*        This iteration only involves rows/columns IFIRST:ILAST. We */
+/*        assume IFIRST < ILAST, and that the diagonal of B is non-zero. */
+
+L110:
+	++iiter;
+	if (! ilschr) {
+	    ifrstm = ifirst;
+	}
+
+/*        Compute single shifts. */
+
+/*        At this point, IFIRST < ILAST, and the diagonal elements of */
+/*        T(IFIRST:ILAST,IFIRST,ILAST) are larger than BTOL (in */
+/*        magnitude) */
+
+	if (iiter / 10 * 10 == iiter) {
+
+/*           Exceptional shift.  Chosen for no particularly good reason. */
+/*           (Single shift only.) */
+
+	    if ((real) maxit * safmin * (r__1 = h__[ilast + (ilast - 1) * 
+		    h_dim1], abs(r__1)) < (r__2 = t[ilast - 1 + (ilast - 1) * 
+		    t_dim1], abs(r__2))) {
+		eshift = h__[ilast + (ilast - 1) * h_dim1] / t[ilast - 1 + (
+			ilast - 1) * t_dim1];
+	    } else {
+		eshift += 1.f / (safmin * (real) maxit);
+	    }
+	    s1 = 1.f;
+	    wr = eshift;
+
+	} else {
+
+/*           Shifts based on the generalized eigenvalues of the */
+/*           bottom-right 2x2 block of A and B. The first eigenvalue */
+/*           returned by SLAG2 is the Wilkinson shift (AEP p.512), */
+
+	    r__1 = safmin * 100.f;
+	    slag2_(&h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &t[ilast - 1 
+		    + (ilast - 1) * t_dim1], ldt, &r__1, &s1, &s2, &wr, &wr2, 
+		    &wi);
+
+	    if ((r__1 = wr / s1 * t[ilast + ilast * t_dim1] - h__[ilast + 
+		    ilast * h_dim1], abs(r__1)) > (r__2 = wr2 / s2 * t[ilast 
+		    + ilast * t_dim1] - h__[ilast + ilast * h_dim1], abs(r__2)
+		    )) {
+		temp = wr;
+		wr = wr2;
+		wr2 = temp;
+		temp = s1;
+		s1 = s2;
+		s2 = temp;
+	    }
+/* Computing MAX */
+/* Computing MAX */
+	    r__3 = 1.f, r__4 = abs(wr), r__3 = f2cmax(r__3,r__4), r__4 = abs(wi);
+	    r__1 = s1, r__2 = safmin * f2cmax(r__3,r__4);
+	    temp = f2cmax(r__1,r__2);
+	    if (wi != 0.f) {
+		goto L200;
+	    }
+	}
+
+/*        Fiddle with shift to avoid overflow */
+
+	temp = f2cmin(ascale,1.f) * (safmax * .5f);
+	if (s1 > temp) {
+	    scale = temp / s1;
+	} else {
+	    scale = 1.f;
+	}
+
+	temp = f2cmin(bscale,1.f) * (safmax * .5f);
+	if (abs(wr) > temp) {
+/* Computing MIN */
+	    r__1 = scale, r__2 = temp / abs(wr);
+	    scale = f2cmin(r__1,r__2);
+	}
+	s1 = scale * s1;
+	wr = scale * wr;
+
+/*        Now check for two consecutive small subdiagonals. */
+
+	i__2 = ifirst + 1;
+	for (j = ilast - 1; j >= i__2; --j) {
+	    istart = j;
+	    temp = (r__1 = s1 * h__[j + (j - 1) * h_dim1], abs(r__1));
+	    temp2 = (r__1 = s1 * h__[j + j * h_dim1] - wr * t[j + j * t_dim1],
+		     abs(r__1));
+	    tempr = f2cmax(temp,temp2);
+	    if (tempr < 1.f && tempr != 0.f) {
+		temp /= tempr;
+		temp2 /= tempr;
+	    }
+	    if ((r__1 = ascale * h__[j + 1 + j * h_dim1] * temp, abs(r__1)) <=
+		     ascale * atol * temp2) {
+		goto L130;
+	    }
+/* L120: */
+	}
+
+	istart = ifirst;
+L130:
+
+/*        Do an implicit single-shift QZ sweep. */
+
+/*        Initial Q */
+
+	temp = s1 * h__[istart + istart * h_dim1] - wr * t[istart + istart * 
+		t_dim1];
+	temp2 = s1 * h__[istart + 1 + istart * h_dim1];
+	slartg_(&temp, &temp2, &c__, &s, &tempr);
+
+/*        Sweep */
+
+	i__2 = ilast - 1;
+	for (j = istart; j <= i__2; ++j) {
+	    if (j > istart) {
+		temp = h__[j + (j - 1) * h_dim1];
+		slartg_(&temp, &h__[j + 1 + (j - 1) * h_dim1], &c__, &s, &h__[
+			j + (j - 1) * h_dim1]);
+		h__[j + 1 + (j - 1) * h_dim1] = 0.f;
+	    }
+
+	    i__3 = ilastm;
+	    for (jc = j; jc <= i__3; ++jc) {
+		temp = c__ * h__[j + jc * h_dim1] + s * h__[j + 1 + jc * 
+			h_dim1];
+		h__[j + 1 + jc * h_dim1] = -s * h__[j + jc * h_dim1] + c__ * 
+			h__[j + 1 + jc * h_dim1];
+		h__[j + jc * h_dim1] = temp;
+		temp2 = c__ * t[j + jc * t_dim1] + s * t[j + 1 + jc * t_dim1];
+		t[j + 1 + jc * t_dim1] = -s * t[j + jc * t_dim1] + c__ * t[j 
+			+ 1 + jc * t_dim1];
+		t[j + jc * t_dim1] = temp2;
+/* L140: */
+	    }
+	    if (ilq) {
+		i__3 = *n;
+		for (jr = 1; jr <= i__3; ++jr) {
+		    temp = c__ * q[jr + j * q_dim1] + s * q[jr + (j + 1) * 
+			    q_dim1];
+		    q[jr + (j + 1) * q_dim1] = -s * q[jr + j * q_dim1] + c__ *
+			     q[jr + (j + 1) * q_dim1];
+		    q[jr + j * q_dim1] = temp;
+/* L150: */
+		}
+	    }
+
+	    temp = t[j + 1 + (j + 1) * t_dim1];
+	    slartg_(&temp, &t[j + 1 + j * t_dim1], &c__, &s, &t[j + 1 + (j + 
+		    1) * t_dim1]);
+	    t[j + 1 + j * t_dim1] = 0.f;
+
+/* Computing MIN */
+	    i__4 = j + 2;
+	    i__3 = f2cmin(i__4,ilast);
+	    for (jr = ifrstm; jr <= i__3; ++jr) {
+		temp = c__ * h__[jr + (j + 1) * h_dim1] + s * h__[jr + j * 
+			h_dim1];
+		h__[jr + j * h_dim1] = -s * h__[jr + (j + 1) * h_dim1] + c__ *
+			 h__[jr + j * h_dim1];
+		h__[jr + (j + 1) * h_dim1] = temp;
+/* L160: */
+	    }
+	    i__3 = j;
+	    for (jr = ifrstm; jr <= i__3; ++jr) {
+		temp = c__ * t[jr + (j + 1) * t_dim1] + s * t[jr + j * t_dim1]
+			;
+		t[jr + j * t_dim1] = -s * t[jr + (j + 1) * t_dim1] + c__ * t[
+			jr + j * t_dim1];
+		t[jr + (j + 1) * t_dim1] = temp;
+/* L170: */
+	    }
+	    if (ilz) {
+		i__3 = *n;
+		for (jr = 1; jr <= i__3; ++jr) {
+		    temp = c__ * z__[jr + (j + 1) * z_dim1] + s * z__[jr + j *
+			     z_dim1];
+		    z__[jr + j * z_dim1] = -s * z__[jr + (j + 1) * z_dim1] + 
+			    c__ * z__[jr + j * z_dim1];
+		    z__[jr + (j + 1) * z_dim1] = temp;
+/* L180: */
+		}
+	    }
+/* L190: */
+	}
+
+	goto L350;
+
+/*        Use Francis double-shift */
+
+/*        Note: the Francis double-shift should work with real shifts, */
+/*              but only if the block is at least 3x3. */
+/*              This code may break if this point is reached with */
+/*              a 2x2 block with real eigenvalues. */
+
+L200:
+	if (ifirst + 1 == ilast) {
+
+/*           Special case -- 2x2 block with complex eigenvectors */
+
+/*           Step 1: Standardize, that is, rotate so that */
+
+/*                       ( B11  0  ) */
+/*                   B = (         )  with B11 non-negative. */
+/*                       (  0  B22 ) */
+
+	    slasv2_(&t[ilast - 1 + (ilast - 1) * t_dim1], &t[ilast - 1 + 
+		    ilast * t_dim1], &t[ilast + ilast * t_dim1], &b22, &b11, &
+		    sr, &cr, &sl, &cl);
+
+	    if (b11 < 0.f) {
+		cr = -cr;
+		sr = -sr;
+		b11 = -b11;
+		b22 = -b22;
+	    }
+
+	    i__2 = ilastm + 1 - ifirst;
+	    srot_(&i__2, &h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &h__[
+		    ilast + (ilast - 1) * h_dim1], ldh, &cl, &sl);
+	    i__2 = ilast + 1 - ifrstm;
+	    srot_(&i__2, &h__[ifrstm + (ilast - 1) * h_dim1], &c__1, &h__[
+		    ifrstm + ilast * h_dim1], &c__1, &cr, &sr);
+
+	    if (ilast < ilastm) {
+		i__2 = ilastm - ilast;
+		srot_(&i__2, &t[ilast - 1 + (ilast + 1) * t_dim1], ldt, &t[
+			ilast + (ilast + 1) * t_dim1], ldt, &cl, &sl);
+	    }
+	    if (ifrstm < ilast - 1) {
+		i__2 = ifirst - ifrstm;
+		srot_(&i__2, &t[ifrstm + (ilast - 1) * t_dim1], &c__1, &t[
+			ifrstm + ilast * t_dim1], &c__1, &cr, &sr);
+	    }
+
+	    if (ilq) {
+		srot_(n, &q[(ilast - 1) * q_dim1 + 1], &c__1, &q[ilast * 
+			q_dim1 + 1], &c__1, &cl, &sl);
+	    }
+	    if (ilz) {
+		srot_(n, &z__[(ilast - 1) * z_dim1 + 1], &c__1, &z__[ilast * 
+			z_dim1 + 1], &c__1, &cr, &sr);
+	    }
+
+	    t[ilast - 1 + (ilast - 1) * t_dim1] = b11;
+	    t[ilast - 1 + ilast * t_dim1] = 0.f;
+	    t[ilast + (ilast - 1) * t_dim1] = 0.f;
+	    t[ilast + ilast * t_dim1] = b22;
+
+/*           If B22 is negative, negate column ILAST */
+
+	    if (b22 < 0.f) {
+		i__2 = ilast;
+		for (j = ifrstm; j <= i__2; ++j) {
+		    h__[j + ilast * h_dim1] = -h__[j + ilast * h_dim1];
+		    t[j + ilast * t_dim1] = -t[j + ilast * t_dim1];
+/* L210: */
+		}
+
+		if (ilz) {
+		    i__2 = *n;
+		    for (j = 1; j <= i__2; ++j) {
+			z__[j + ilast * z_dim1] = -z__[j + ilast * z_dim1];
+/* L220: */
+		    }
+		}
+		b22 = -b22;
+	    }
+
+/*           Step 2: Compute ALPHAR, ALPHAI, and BETA (see refs.) */
+
+/*           Recompute shift */
+
+	    r__1 = safmin * 100.f;
+	    slag2_(&h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &t[ilast - 1 
+		    + (ilast - 1) * t_dim1], ldt, &r__1, &s1, &temp, &wr, &
+		    temp2, &wi);
+
+/*           If standardization has perturbed the shift onto real line, */
+/*           do another (real single-shift) QR step. */
+
+	    if (wi == 0.f) {
+		goto L350;
+	    }
+	    s1inv = 1.f / s1;
+
+/*           Do EISPACK (QZVAL) computation of alpha and beta */
+
+	    a11 = h__[ilast - 1 + (ilast - 1) * h_dim1];
+	    a21 = h__[ilast + (ilast - 1) * h_dim1];
+	    a12 = h__[ilast - 1 + ilast * h_dim1];
+	    a22 = h__[ilast + ilast * h_dim1];
+
+/*           Compute complex Givens rotation on right */
+/*           (Assume some element of C = (sA - wB) > unfl ) */
+/*                            __ */
+/*           (sA - wB) ( CZ   -SZ ) */
+/*                     ( SZ    CZ ) */
+
+	    c11r = s1 * a11 - wr * b11;
+	    c11i = -wi * b11;
+	    c12 = s1 * a12;
+	    c21 = s1 * a21;
+	    c22r = s1 * a22 - wr * b22;
+	    c22i = -wi * b22;
+
+	    if (abs(c11r) + abs(c11i) + abs(c12) > abs(c21) + abs(c22r) + abs(
+		    c22i)) {
+		t1 = slapy3_(&c12, &c11r, &c11i);
+		cz = c12 / t1;
+		szr = -c11r / t1;
+		szi = -c11i / t1;
+	    } else {
+		cz = slapy2_(&c22r, &c22i);
+		if (cz <= safmin) {
+		    cz = 0.f;
+		    szr = 1.f;
+		    szi = 0.f;
+		} else {
+		    tempr = c22r / cz;
+		    tempi = c22i / cz;
+		    t1 = slapy2_(&cz, &c21);
+		    cz /= t1;
+		    szr = -c21 * tempr / t1;
+		    szi = c21 * tempi / t1;
+		}
+	    }
+
+/*           Compute Givens rotation on left */
+
+/*           (  CQ   SQ ) */
+/*           (  __      )  A or B */
+/*           ( -SQ   CQ ) */
+
+	    an = abs(a11) + abs(a12) + abs(a21) + abs(a22);
+	    bn = abs(b11) + abs(b22);
+	    wabs = abs(wr) + abs(wi);
+	    if (s1 * an > wabs * bn) {
+		cq = cz * b11;
+		sqr = szr * b22;
+		sqi = -szi * b22;
+	    } else {
+		a1r = cz * a11 + szr * a12;
+		a1i = szi * a12;
+		a2r = cz * a21 + szr * a22;
+		a2i = szi * a22;
+		cq = slapy2_(&a1r, &a1i);
+		if (cq <= safmin) {
+		    cq = 0.f;
+		    sqr = 1.f;
+		    sqi = 0.f;
+		} else {
+		    tempr = a1r / cq;
+		    tempi = a1i / cq;
+		    sqr = tempr * a2r + tempi * a2i;
+		    sqi = tempi * a2r - tempr * a2i;
+		}
+	    }
+	    t1 = slapy3_(&cq, &sqr, &sqi);
+	    cq /= t1;
+	    sqr /= t1;
+	    sqi /= t1;
+
+/*           Compute diagonal elements of QBZ */
+
+	    tempr = sqr * szr - sqi * szi;
+	    tempi = sqr * szi + sqi * szr;
+	    b1r = cq * cz * b11 + tempr * b22;
+	    b1i = tempi * b22;
+	    b1a = slapy2_(&b1r, &b1i);
+	    b2r = cq * cz * b22 + tempr * b11;
+	    b2i = -tempi * b11;
+	    b2a = slapy2_(&b2r, &b2i);
+
+/*           Normalize so beta > 0, and Im( alpha1 ) > 0 */
+
+	    beta[ilast - 1] = b1a;
+	    beta[ilast] = b2a;
+	    alphar[ilast - 1] = wr * b1a * s1inv;
+	    alphai[ilast - 1] = wi * b1a * s1inv;
+	    alphar[ilast] = wr * b2a * s1inv;
+	    alphai[ilast] = -(wi * b2a) * s1inv;
+
+/*           Step 3: Go to next block -- exit if finished. */
+
+	    ilast = ifirst - 1;
+	    if (ilast < *ilo) {
+		goto L380;
+	    }
+
+/*           Reset counters */
+
+	    iiter = 0;
+	    eshift = 0.f;
+	    if (! ilschr) {
+		ilastm = ilast;
+		if (ifrstm > ilast) {
+		    ifrstm = *ilo;
+		}
+	    }
+	    goto L350;
+	} else {
+
+/*           Usual case: 3x3 or larger block, using Francis implicit */
+/*                       double-shift */
+
+/*                                    2 */
+/*           Eigenvalue equation is  w  - c w + d = 0, */
+
+/*                                         -1 2        -1 */
+/*           so compute 1st column of  (A B  )  - c A B   + d */
+/*           using the formula in QZIT (from EISPACK) */
+
+/*           We assume that the block is at least 3x3 */
+
+	    ad11 = ascale * h__[ilast - 1 + (ilast - 1) * h_dim1] / (bscale * 
+		    t[ilast - 1 + (ilast - 1) * t_dim1]);
+	    ad21 = ascale * h__[ilast + (ilast - 1) * h_dim1] / (bscale * t[
+		    ilast - 1 + (ilast - 1) * t_dim1]);
+	    ad12 = ascale * h__[ilast - 1 + ilast * h_dim1] / (bscale * t[
+		    ilast + ilast * t_dim1]);
+	    ad22 = ascale * h__[ilast + ilast * h_dim1] / (bscale * t[ilast + 
+		    ilast * t_dim1]);
+	    u12 = t[ilast - 1 + ilast * t_dim1] / t[ilast + ilast * t_dim1];
+	    ad11l = ascale * h__[ifirst + ifirst * h_dim1] / (bscale * t[
+		    ifirst + ifirst * t_dim1]);
+	    ad21l = ascale * h__[ifirst + 1 + ifirst * h_dim1] / (bscale * t[
+		    ifirst + ifirst * t_dim1]);
+	    ad12l = ascale * h__[ifirst + (ifirst + 1) * h_dim1] / (bscale * 
+		    t[ifirst + 1 + (ifirst + 1) * t_dim1]);
+	    ad22l = ascale * h__[ifirst + 1 + (ifirst + 1) * h_dim1] / (
+		    bscale * t[ifirst + 1 + (ifirst + 1) * t_dim1]);
+	    ad32l = ascale * h__[ifirst + 2 + (ifirst + 1) * h_dim1] / (
+		    bscale * t[ifirst + 1 + (ifirst + 1) * t_dim1]);
+	    u12l = t[ifirst + (ifirst + 1) * t_dim1] / t[ifirst + 1 + (ifirst 
+		    + 1) * t_dim1];
+
+	    v[0] = (ad11 - ad11l) * (ad22 - ad11l) - ad12 * ad21 + ad21 * u12 
+		    * ad11l + (ad12l - ad11l * u12l) * ad21l;
+	    v[1] = (ad22l - ad11l - ad21l * u12l - (ad11 - ad11l) - (ad22 - 
+		    ad11l) + ad21 * u12) * ad21l;
+	    v[2] = ad32l * ad21l;
+
+	    istart = ifirst;
+
+	    slarfg_(&c__3, v, &v[1], &c__1, &tau);
+	    v[0] = 1.f;
+
+/*           Sweep */
+
+	    i__2 = ilast - 2;
+	    for (j = istart; j <= i__2; ++j) {
+
+/*              All but last elements: use 3x3 Householder transforms. */
+
+/*              Zero (j-1)st column of A */
+
+		if (j > istart) {
+		    v[0] = h__[j + (j - 1) * h_dim1];
+		    v[1] = h__[j + 1 + (j - 1) * h_dim1];
+		    v[2] = h__[j + 2 + (j - 1) * h_dim1];
+
+		    slarfg_(&c__3, &h__[j + (j - 1) * h_dim1], &v[1], &c__1, &
+			    tau);
+		    v[0] = 1.f;
+		    h__[j + 1 + (j - 1) * h_dim1] = 0.f;
+		    h__[j + 2 + (j - 1) * h_dim1] = 0.f;
+		}
+
+		i__3 = ilastm;
+		for (jc = j; jc <= i__3; ++jc) {
+		    temp = tau * (h__[j + jc * h_dim1] + v[1] * h__[j + 1 + 
+			    jc * h_dim1] + v[2] * h__[j + 2 + jc * h_dim1]);
+		    h__[j + jc * h_dim1] -= temp;
+		    h__[j + 1 + jc * h_dim1] -= temp * v[1];
+		    h__[j + 2 + jc * h_dim1] -= temp * v[2];
+		    temp2 = tau * (t[j + jc * t_dim1] + v[1] * t[j + 1 + jc * 
+			    t_dim1] + v[2] * t[j + 2 + jc * t_dim1]);
+		    t[j + jc * t_dim1] -= temp2;
+		    t[j + 1 + jc * t_dim1] -= temp2 * v[1];
+		    t[j + 2 + jc * t_dim1] -= temp2 * v[2];
+/* L230: */
+		}
+		if (ilq) {
+		    i__3 = *n;
+		    for (jr = 1; jr <= i__3; ++jr) {
+			temp = tau * (q[jr + j * q_dim1] + v[1] * q[jr + (j + 
+				1) * q_dim1] + v[2] * q[jr + (j + 2) * q_dim1]
+				);
+			q[jr + j * q_dim1] -= temp;
+			q[jr + (j + 1) * q_dim1] -= temp * v[1];
+			q[jr + (j + 2) * q_dim1] -= temp * v[2];
+/* L240: */
+		    }
+		}
+
+/*              Zero j-th column of B (see SLAGBC for details) */
+
+/*              Swap rows to pivot */
+
+		ilpivt = FALSE_;
+/* Computing MAX */
+		r__3 = (r__1 = t[j + 1 + (j + 1) * t_dim1], abs(r__1)), r__4 =
+			 (r__2 = t[j + 1 + (j + 2) * t_dim1], abs(r__2));
+		temp = f2cmax(r__3,r__4);
+/* Computing MAX */
+		r__3 = (r__1 = t[j + 2 + (j + 1) * t_dim1], abs(r__1)), r__4 =
+			 (r__2 = t[j + 2 + (j + 2) * t_dim1], abs(r__2));
+		temp2 = f2cmax(r__3,r__4);
+		if (f2cmax(temp,temp2) < safmin) {
+		    scale = 0.f;
+		    u1 = 1.f;
+		    u2 = 0.f;
+		    goto L250;
+		} else if (temp >= temp2) {
+		    w11 = t[j + 1 + (j + 1) * t_dim1];
+		    w21 = t[j + 2 + (j + 1) * t_dim1];
+		    w12 = t[j + 1 + (j + 2) * t_dim1];
+		    w22 = t[j + 2 + (j + 2) * t_dim1];
+		    u1 = t[j + 1 + j * t_dim1];
+		    u2 = t[j + 2 + j * t_dim1];
+		} else {
+		    w21 = t[j + 1 + (j + 1) * t_dim1];
+		    w11 = t[j + 2 + (j + 1) * t_dim1];
+		    w22 = t[j + 1 + (j + 2) * t_dim1];
+		    w12 = t[j + 2 + (j + 2) * t_dim1];
+		    u2 = t[j + 1 + j * t_dim1];
+		    u1 = t[j + 2 + j * t_dim1];
+		}
+
+/*              Swap columns if nec. */
+
+		if (abs(w12) > abs(w11)) {
+		    ilpivt = TRUE_;
+		    temp = w12;
+		    temp2 = w22;
+		    w12 = w11;
+		    w22 = w21;
+		    w11 = temp;
+		    w21 = temp2;
+		}
+
+/*              LU-factor */
+
+		temp = w21 / w11;
+		u2 -= temp * u1;
+		w22 -= temp * w12;
+		w21 = 0.f;
+
+/*              Compute SCALE */
+
+		scale = 1.f;
+		if (abs(w22) < safmin) {
+		    scale = 0.f;
+		    u2 = 1.f;
+		    u1 = -w12 / w11;
+		    goto L250;
+		}
+		if (abs(w22) < abs(u2)) {
+		    scale = (r__1 = w22 / u2, abs(r__1));
+		}
+		if (abs(w11) < abs(u1)) {
+/* Computing MIN */
+		    r__2 = scale, r__3 = (r__1 = w11 / u1, abs(r__1));
+		    scale = f2cmin(r__2,r__3);
+		}
+
+/*              Solve */
+
+		u2 = scale * u2 / w22;
+		u1 = (scale * u1 - w12 * u2) / w11;
+
+L250:
+		if (ilpivt) {
+		    temp = u2;
+		    u2 = u1;
+		    u1 = temp;
+		}
+
+/*              Compute Householder Vector */
+
+/* Computing 2nd power */
+		r__1 = scale;
+/* Computing 2nd power */
+		r__2 = u1;
+/* Computing 2nd power */
+		r__3 = u2;
+		t1 = sqrt(r__1 * r__1 + r__2 * r__2 + r__3 * r__3);
+		tau = scale / t1 + 1.f;
+		vs = -1.f / (scale + t1);
+		v[0] = 1.f;
+		v[1] = vs * u1;
+		v[2] = vs * u2;
+
+/*              Apply transformations from the right. */
+
+/* Computing MIN */
+		i__4 = j + 3;
+		i__3 = f2cmin(i__4,ilast);
+		for (jr = ifrstm; jr <= i__3; ++jr) {
+		    temp = tau * (h__[jr + j * h_dim1] + v[1] * h__[jr + (j + 
+			    1) * h_dim1] + v[2] * h__[jr + (j + 2) * h_dim1]);
+		    h__[jr + j * h_dim1] -= temp;
+		    h__[jr + (j + 1) * h_dim1] -= temp * v[1];
+		    h__[jr + (j + 2) * h_dim1] -= temp * v[2];
+/* L260: */
+		}
+		i__3 = j + 2;
+		for (jr = ifrstm; jr <= i__3; ++jr) {
+		    temp = tau * (t[jr + j * t_dim1] + v[1] * t[jr + (j + 1) *
+			     t_dim1] + v[2] * t[jr + (j + 2) * t_dim1]);
+		    t[jr + j * t_dim1] -= temp;
+		    t[jr + (j + 1) * t_dim1] -= temp * v[1];
+		    t[jr + (j + 2) * t_dim1] -= temp * v[2];
+/* L270: */
+		}
+		if (ilz) {
+		    i__3 = *n;
+		    for (jr = 1; jr <= i__3; ++jr) {
+			temp = tau * (z__[jr + j * z_dim1] + v[1] * z__[jr + (
+				j + 1) * z_dim1] + v[2] * z__[jr + (j + 2) * 
+				z_dim1]);
+			z__[jr + j * z_dim1] -= temp;
+			z__[jr + (j + 1) * z_dim1] -= temp * v[1];
+			z__[jr + (j + 2) * z_dim1] -= temp * v[2];
+/* L280: */
+		    }
+		}
+		t[j + 1 + j * t_dim1] = 0.f;
+		t[j + 2 + j * t_dim1] = 0.f;
+/* L290: */
+	    }
+
+/*           Last elements: Use Givens rotations */
+
+/*           Rotations from the left */
+
+	    j = ilast - 1;
+	    temp = h__[j + (j - 1) * h_dim1];
+	    slartg_(&temp, &h__[j + 1 + (j - 1) * h_dim1], &c__, &s, &h__[j + 
+		    (j - 1) * h_dim1]);
+	    h__[j + 1 + (j - 1) * h_dim1] = 0.f;
+
+	    i__2 = ilastm;
+	    for (jc = j; jc <= i__2; ++jc) {
+		temp = c__ * h__[j + jc * h_dim1] + s * h__[j + 1 + jc * 
+			h_dim1];
+		h__[j + 1 + jc * h_dim1] = -s * h__[j + jc * h_dim1] + c__ * 
+			h__[j + 1 + jc * h_dim1];
+		h__[j + jc * h_dim1] = temp;
+		temp2 = c__ * t[j + jc * t_dim1] + s * t[j + 1 + jc * t_dim1];
+		t[j + 1 + jc * t_dim1] = -s * t[j + jc * t_dim1] + c__ * t[j 
+			+ 1 + jc * t_dim1];
+		t[j + jc * t_dim1] = temp2;
+/* L300: */
+	    }
+	    if (ilq) {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    temp = c__ * q[jr + j * q_dim1] + s * q[jr + (j + 1) * 
+			    q_dim1];
+		    q[jr + (j + 1) * q_dim1] = -s * q[jr + j * q_dim1] + c__ *
+			     q[jr + (j + 1) * q_dim1];
+		    q[jr + j * q_dim1] = temp;
+/* L310: */
+		}
+	    }
+
+/*           Rotations from the right. */
+
+	    temp = t[j + 1 + (j + 1) * t_dim1];
+	    slartg_(&temp, &t[j + 1 + j * t_dim1], &c__, &s, &t[j + 1 + (j + 
+		    1) * t_dim1]);
+	    t[j + 1 + j * t_dim1] = 0.f;
+
+	    i__2 = ilast;
+	    for (jr = ifrstm; jr <= i__2; ++jr) {
+		temp = c__ * h__[jr + (j + 1) * h_dim1] + s * h__[jr + j * 
+			h_dim1];
+		h__[jr + j * h_dim1] = -s * h__[jr + (j + 1) * h_dim1] + c__ *
+			 h__[jr + j * h_dim1];
+		h__[jr + (j + 1) * h_dim1] = temp;
+/* L320: */
+	    }
+	    i__2 = ilast - 1;
+	    for (jr = ifrstm; jr <= i__2; ++jr) {
+		temp = c__ * t[jr + (j + 1) * t_dim1] + s * t[jr + j * t_dim1]
+			;
+		t[jr + j * t_dim1] = -s * t[jr + (j + 1) * t_dim1] + c__ * t[
+			jr + j * t_dim1];
+		t[jr + (j + 1) * t_dim1] = temp;
+/* L330: */
+	    }
+	    if (ilz) {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    temp = c__ * z__[jr + (j + 1) * z_dim1] + s * z__[jr + j *
+			     z_dim1];
+		    z__[jr + j * z_dim1] = -s * z__[jr + (j + 1) * z_dim1] + 
+			    c__ * z__[jr + j * z_dim1];
+		    z__[jr + (j + 1) * z_dim1] = temp;
+/* L340: */
+		}
+	    }
+
+/*           End of Double-Shift code */
+
+	}
+
+	goto L350;
+
+/*        End of iteration loop */
+
+L350:
+/* L360: */
+	;
+    }
+
+/*     Drop-through = non-convergence */
+
+    *info = ilast;
+    goto L420;
+
+/*     Successful completion of all QZ steps */
+
+L380:
+
+/*     Set Eigenvalues 1:ILO-1 */
+
+    i__1 = *ilo - 1;
+    for (j = 1; j <= i__1; ++j) {
+	if (t[j + j * t_dim1] < 0.f) {
+	    if (ilschr) {
+		i__2 = j;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    h__[jr + j * h_dim1] = -h__[jr + j * h_dim1];
+		    t[jr + j * t_dim1] = -t[jr + j * t_dim1];
+/* L390: */
+		}
+	    } else {
+		h__[j + j * h_dim1] = -h__[j + j * h_dim1];
+		t[j + j * t_dim1] = -t[j + j * t_dim1];
+	    }
+	    if (ilz) {
+		i__2 = *n;
+		for (jr = 1; jr <= i__2; ++jr) {
+		    z__[jr + j * z_dim1] = -z__[jr + j * z_dim1];
+/* L400: */
+		}
+	    }
+	}
+	alphar[j] = h__[j + j * h_dim1];
+	alphai[j] = 0.f;
+	beta[j] = t[j + j * t_dim1];
+/* L410: */
+    }
+
+/*     Normal Termination */
+
+    *info = 0;
+
+/*     Exit (other than argument error) -- return optimal workspace size */
+
+L420:
+    work[1] = (real) (*n);
+    return 0;
+
+/*     End of SHGEQZ */
+
+} /* shgeqz_ */
+
diff --git a/lapack-netlib/SRC/shsein.c b/lapack-netlib/SRC/shsein.c
new file mode 100644
index 000000000..f594e5089
--- /dev/null
+++ b/lapack-netlib/SRC/shsein.c
@@ -0,0 +1,968 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static logical c_false = FALSE_;
+static logical c_true = TRUE_;
+
+/* > \brief \b SHSEIN */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SHSEIN + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/shsein.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/shsein.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/shsein.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI, */
+/*                          VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL, */
+/*                          IFAILR, INFO ) */
+
+/*       CHARACTER          EIGSRC, INITV, SIDE */
+/*       INTEGER            INFO, LDH, LDVL, LDVR, M, MM, N */
+/*       LOGICAL            SELECT( * ) */
+/*       INTEGER            IFAILL( * ), IFAILR( * ) */
+/*       REAL               H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), */
+/*      $                   WI( * ), WORK( * ), WR( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SHSEIN uses inverse iteration to find specified right and/or left */
+/* > eigenvectors of a real upper Hessenberg matrix H. */
+/* > */
+/* > The right eigenvector x and the left eigenvector y of the matrix H */
+/* > corresponding to an eigenvalue w are defined by: */
+/* > */
+/* >              H * x = w * x,     y**h * H = w * y**h */
+/* > */
+/* > where y**h denotes the conjugate transpose of the vector y. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'R': compute right eigenvectors only; */
+/* >          = 'L': compute left eigenvectors only; */
+/* >          = 'B': compute both right and left eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] EIGSRC */
+/* > \verbatim */
+/* >          EIGSRC is CHARACTER*1 */
+/* >          Specifies the source of eigenvalues supplied in (WR,WI): */
+/* >          = 'Q': the eigenvalues were found using SHSEQR; thus, if */
+/* >                 H has zero subdiagonal elements, and so is */
+/* >                 block-triangular, then the j-th eigenvalue can be */
+/* >                 assumed to be an eigenvalue of the block containing */
+/* >                 the j-th row/column.  This property allows SHSEIN to */
+/* >                 perform inverse iteration on just one diagonal block. */
+/* >          = 'N': no assumptions are made on the correspondence */
+/* >                 between eigenvalues and diagonal blocks.  In this */
+/* >                 case, SHSEIN must always perform inverse iteration */
+/* >                 using the whole matrix H. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] INITV */
+/* > \verbatim */
+/* >          INITV is CHARACTER*1 */
+/* >          = 'N': no initial vectors are supplied; */
+/* >          = 'U': user-supplied initial vectors are stored in the arrays */
+/* >                 VL and/or VR. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] SELECT */
+/* > \verbatim */
+/* >          SELECT is LOGICAL array, dimension (N) */
+/* >          Specifies the eigenvectors to be computed. To select the */
+/* >          real eigenvector corresponding to a real eigenvalue WR(j), */
+/* >          SELECT(j) must be set to .TRUE.. To select the complex */
+/* >          eigenvector corresponding to a complex eigenvalue */
+/* >          (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)), */
+/* >          either SELECT(j) or SELECT(j+1) or both must be set to */
+/* >          .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is */
+/* >          .FALSE.. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix H.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] H */
+/* > \verbatim */
+/* >          H is REAL array, dimension (LDH,N) */
+/* >          The upper Hessenberg matrix H. */
+/* >          If a NaN is detected in H, the routine will return with INFO=-6. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDH */
+/* > \verbatim */
+/* >          LDH is INTEGER */
+/* >          The leading dimension of the array H.  LDH >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] WR */
+/* > \verbatim */
+/* >          WR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] WI */
+/* > \verbatim */
+/* >          WI is REAL array, dimension (N) */
+/* > */
+/* >          On entry, the real and imaginary parts of the eigenvalues of */
+/* >          H; a complex conjugate pair of eigenvalues must be stored in */
+/* >          consecutive elements of WR and WI. */
+/* >          On exit, WR may have been altered since close eigenvalues */
+/* >          are perturbed slightly in searching for independent */
+/* >          eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] VL */
+/* > \verbatim */
+/* >          VL is REAL array, dimension (LDVL,MM) */
+/* >          On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must */
+/* >          contain starting vectors for the inverse iteration for the */
+/* >          left eigenvectors; the starting vector for each eigenvector */
+/* >          must be in the same column(s) in which the eigenvector will */
+/* >          be stored. */
+/* >          On exit, if SIDE = 'L' or 'B', the left eigenvectors */
+/* >          specified by SELECT will be stored consecutively in the */
+/* >          columns of VL, in the same order as their eigenvalues. A */
+/* >          complex eigenvector corresponding to a complex eigenvalue is */
+/* >          stored in two consecutive columns, the first holding the real */
+/* >          part and the second the imaginary part. */
+/* >          If SIDE = 'R', VL is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVL */
+/* > \verbatim */
+/* >          LDVL is INTEGER */
+/* >          The leading dimension of the array VL. */
+/* >          LDVL >= f2cmax(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] VR */
+/* > \verbatim */
+/* >          VR is REAL array, dimension (LDVR,MM) */
+/* >          On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must */
+/* >          contain starting vectors for the inverse iteration for the */
+/* >          right eigenvectors; the starting vector for each eigenvector */
+/* >          must be in the same column(s) in which the eigenvector will */
+/* >          be stored. */
+/* >          On exit, if SIDE = 'R' or 'B', the right eigenvectors */
+/* >          specified by SELECT will be stored consecutively in the */
+/* >          columns of VR, in the same order as their eigenvalues. A */
+/* >          complex eigenvector corresponding to a complex eigenvalue is */
+/* >          stored in two consecutive columns, the first holding the real */
+/* >          part and the second the imaginary part. */
+/* >          If SIDE = 'L', VR is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVR */
+/* > \verbatim */
+/* >          LDVR is INTEGER */
+/* >          The leading dimension of the array VR. */
+/* >          LDVR >= f2cmax(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MM */
+/* > \verbatim */
+/* >          MM is INTEGER */
+/* >          The number of columns in the arrays VL and/or VR. MM >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of columns in the arrays VL and/or VR required to */
+/* >          store the eigenvectors; each selected real eigenvector */
+/* >          occupies one column and each selected complex eigenvector */
+/* >          occupies two columns. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension ((N+2)*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IFAILL */
+/* > \verbatim */
+/* >          IFAILL is INTEGER array, dimension (MM) */
+/* >          If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left */
+/* >          eigenvector in the i-th column of VL (corresponding to the */
+/* >          eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the */
+/* >          eigenvector converged satisfactorily. If the i-th and (i+1)th */
+/* >          columns of VL hold a complex eigenvector, then IFAILL(i) and */
+/* >          IFAILL(i+1) are set to the same value. */
+/* >          If SIDE = 'R', IFAILL is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IFAILR */
+/* > \verbatim */
+/* >          IFAILR is INTEGER array, dimension (MM) */
+/* >          If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right */
+/* >          eigenvector in the i-th column of VR (corresponding to the */
+/* >          eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the */
+/* >          eigenvector converged satisfactorily. If the i-th and (i+1)th */
+/* >          columns of VR hold a complex eigenvector, then IFAILR(i) and */
+/* >          IFAILR(i+1) are set to the same value. */
+/* >          If SIDE = 'L', IFAILR is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, i is the number of eigenvectors which */
+/* >                failed to converge; see IFAILL and IFAILR for further */
+/* >                details. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realOTHERcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  Each eigenvector is normalized so that the element of largest */
+/* >  magnitude has magnitude 1; here the magnitude of a complex number */
+/* >  (x,y) is taken to be |x|+|y|. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int shsein_(char *side, char *eigsrc, char *initv, logical *
+	select, integer *n, real *h__, integer *ldh, real *wr, real *wi, real 
+	*vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, 
+	real *work, integer *ifaill, integer *ifailr, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
+	    i__2;
+    real r__1, r__2;
+
+    /* Local variables */
+    logical pair;
+    real unfl;
+    integer i__, k;
+    extern logical lsame_(char *, char *);
+    integer iinfo;
+    logical leftv, bothv;
+    real hnorm;
+    integer kl, kr;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int slaein_(logical *, logical *, integer *, real 
+	    *, integer *, real *, real *, real *, real *, real *, integer *, 
+	    real *, real *, real *, real *, integer *), xerbla_(char *, 
+	    integer *, ftnlen);
+    real bignum;
+    extern real slanhs_(char *, integer *, real *, integer *, real *);
+    extern logical sisnan_(real *);
+    logical noinit;
+    integer ldwork;
+    logical rightv, fromqr;
+    real smlnum;
+    integer kln, ksi;
+    real wki;
+    integer ksr;
+    real ulp, wkr, eps3;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode and test the input parameters. */
+
+    /* Parameter adjustments */
+    --select;
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1 * 1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1 * 1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1 * 1;
+    vr -= vr_offset;
+    --work;
+    --ifaill;
+    --ifailr;
+
+    /* Function Body */
+    bothv = lsame_(side, "B");
+    rightv = lsame_(side, "R") || bothv;
+    leftv = lsame_(side, "L") || bothv;
+
+    fromqr = lsame_(eigsrc, "Q");
+
+    noinit = lsame_(initv, "N");
+
+/*     Set M to the number of columns required to store the selected */
+/*     eigenvectors, and standardize the array SELECT. */
+
+    *m = 0;
+    pair = FALSE_;
+    i__1 = *n;
+    for (k = 1; k <= i__1; ++k) {
+	if (pair) {
+	    pair = FALSE_;
+	    select[k] = FALSE_;
+	} else {
+	    if (wi[k] == 0.f) {
+		if (select[k]) {
+		    ++(*m);
+		}
+	    } else {
+		pair = TRUE_;
+		if (select[k] || select[k + 1]) {
+		    select[k] = TRUE_;
+		    *m += 2;
+		}
+	    }
+	}
+/* L10: */
+    }
+
+    *info = 0;
+    if (! rightv && ! leftv) {
+	*info = -1;
+    } else if (! fromqr && ! lsame_(eigsrc, "N")) {
+	*info = -2;
+    } else if (! noinit && ! lsame_(initv, "U")) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*ldh < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
+	*info = -11;
+    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
+	*info = -13;
+    } else if (*mm < *m) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SHSEIN", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Set machine-dependent constants. */
+
+    unfl = slamch_("Safe minimum");
+    ulp = slamch_("Precision");
+    smlnum = unfl * (*n / ulp);
+    bignum = (1.f - ulp) / smlnum;
+
+    ldwork = *n + 1;
+
+    kl = 1;
+    kln = 0;
+    if (fromqr) {
+	kr = 0;
+    } else {
+	kr = *n;
+    }
+    ksr = 1;
+
+    i__1 = *n;
+    for (k = 1; k <= i__1; ++k) {
+	if (select[k]) {
+
+/*           Compute eigenvector(s) corresponding to W(K). */
+
+	    if (fromqr) {
+
+/*              If affiliation of eigenvalues is known, check whether */
+/*              the matrix splits. */
+
+/*              Determine KL and KR such that 1 <= KL <= K <= KR <= N */
+/*              and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or */
+/*              KR = N). */
+
+/*              Then inverse iteration can be performed with the */
+/*              submatrix H(KL:N,KL:N) for a left eigenvector, and with */
+/*              the submatrix H(1:KR,1:KR) for a right eigenvector. */
+
+		i__2 = kl + 1;
+		for (i__ = k; i__ >= i__2; --i__) {
+		    if (h__[i__ + (i__ - 1) * h_dim1] == 0.f) {
+			goto L30;
+		    }
+/* L20: */
+		}
+L30:
+		kl = i__;
+		if (k > kr) {
+		    i__2 = *n - 1;
+		    for (i__ = k; i__ <= i__2; ++i__) {
+			if (h__[i__ + 1 + i__ * h_dim1] == 0.f) {
+			    goto L50;
+			}
+/* L40: */
+		    }
+L50:
+		    kr = i__;
+		}
+	    }
+
+	    if (kl != kln) {
+		kln = kl;
+
+/*              Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it */
+/*              has not ben computed before. */
+
+		i__2 = kr - kl + 1;
+		hnorm = slanhs_("I", &i__2, &h__[kl + kl * h_dim1], ldh, &
+			work[1]);
+		if (sisnan_(&hnorm)) {
+		    *info = -6;
+		    return 0;
+		} else if (hnorm > 0.f) {
+		    eps3 = hnorm * ulp;
+		} else {
+		    eps3 = smlnum;
+		}
+	    }
+
+/*           Perturb eigenvalue if it is close to any previous */
+/*           selected eigenvalues affiliated to the submatrix */
+/*           H(KL:KR,KL:KR). Close roots are modified by EPS3. */
+
+	    wkr = wr[k];
+	    wki = wi[k];
+L60:
+	    i__2 = kl;
+	    for (i__ = k - 1; i__ >= i__2; --i__) {
+		if (select[i__] && (r__1 = wr[i__] - wkr, abs(r__1)) + (r__2 =
+			 wi[i__] - wki, abs(r__2)) < eps3) {
+		    wkr += eps3;
+		    goto L60;
+		}
+/* L70: */
+	    }
+	    wr[k] = wkr;
+
+	    pair = wki != 0.f;
+	    if (pair) {
+		ksi = ksr + 1;
+	    } else {
+		ksi = ksr;
+	    }
+	    if (leftv) {
+
+/*              Compute left eigenvector. */
+
+		i__2 = *n - kl + 1;
+		slaein_(&c_false, &noinit, &i__2, &h__[kl + kl * h_dim1], ldh,
+			 &wkr, &wki, &vl[kl + ksr * vl_dim1], &vl[kl + ksi * 
+			vl_dim1], &work[1], &ldwork, &work[*n * *n + *n + 1], 
+			&eps3, &smlnum, &bignum, &iinfo);
+		if (iinfo > 0) {
+		    if (pair) {
+			*info += 2;
+		    } else {
+			++(*info);
+		    }
+		    ifaill[ksr] = k;
+		    ifaill[ksi] = k;
+		} else {
+		    ifaill[ksr] = 0;
+		    ifaill[ksi] = 0;
+		}
+		i__2 = kl - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    vl[i__ + ksr * vl_dim1] = 0.f;
+/* L80: */
+		}
+		if (pair) {
+		    i__2 = kl - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			vl[i__ + ksi * vl_dim1] = 0.f;
+/* L90: */
+		    }
+		}
+	    }
+	    if (rightv) {
+
+/*              Compute right eigenvector. */
+
+		slaein_(&c_true, &noinit, &kr, &h__[h_offset], ldh, &wkr, &
+			wki, &vr[ksr * vr_dim1 + 1], &vr[ksi * vr_dim1 + 1], &
+			work[1], &ldwork, &work[*n * *n + *n + 1], &eps3, &
+			smlnum, &bignum, &iinfo);
+		if (iinfo > 0) {
+		    if (pair) {
+			*info += 2;
+		    } else {
+			++(*info);
+		    }
+		    ifailr[ksr] = k;
+		    ifailr[ksi] = k;
+		} else {
+		    ifailr[ksr] = 0;
+		    ifailr[ksi] = 0;
+		}
+		i__2 = *n;
+		for (i__ = kr + 1; i__ <= i__2; ++i__) {
+		    vr[i__ + ksr * vr_dim1] = 0.f;
+/* L100: */
+		}
+		if (pair) {
+		    i__2 = *n;
+		    for (i__ = kr + 1; i__ <= i__2; ++i__) {
+			vr[i__ + ksi * vr_dim1] = 0.f;
+/* L110: */
+		    }
+		}
+	    }
+
+	    if (pair) {
+		ksr += 2;
+	    } else {
+		++ksr;
+	    }
+	}
+/* L120: */
+    }
+
+    return 0;
+
+/*     End of SHSEIN */
+
+} /* shsein_ */
+
diff --git a/lapack-netlib/SRC/shseqr.c b/lapack-netlib/SRC/shseqr.c
new file mode 100644
index 000000000..dccfeefcf
--- /dev/null
+++ b/lapack-netlib/SRC/shseqr.c
@@ -0,0 +1,941 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static real c_b11 = 0.f;
+static real c_b12 = 1.f;
+static integer c__12 = 12;
+static integer c__2 = 2;
+static integer c__49 = 49;
+
+/* > \brief \b SHSEQR */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SHSEQR + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/shseqr.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/shseqr.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/shseqr.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, */
+/*                          LDZ, WORK, LWORK, INFO ) */
+
+/*       INTEGER            IHI, ILO, INFO, LDH, LDZ, LWORK, N */
+/*       CHARACTER          COMPZ, JOB */
+/*       REAL               H( LDH, * ), WI( * ), WORK( * ), WR( * ), */
+/*      $                   Z( LDZ, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    SHSEQR computes the eigenvalues of a Hessenberg matrix H */
+/* >    and, optionally, the matrices T and Z from the Schur decomposition */
+/* >    H = Z T Z**T, where T is an upper quasi-triangular matrix (the */
+/* >    Schur form), and Z is the orthogonal matrix of Schur vectors. */
+/* > */
+/* >    Optionally Z may be postmultiplied into an input orthogonal */
+/* >    matrix Q so that this routine can give the Schur factorization */
+/* >    of a matrix A which has been reduced to the Hessenberg form H */
+/* >    by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOB */
+/* > \verbatim */
+/* >          JOB is CHARACTER*1 */
+/* >           = 'E':  compute eigenvalues only; */
+/* >           = 'S':  compute eigenvalues and the Schur form T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] COMPZ */
+/* > \verbatim */
+/* >          COMPZ is CHARACTER*1 */
+/* >           = 'N':  no Schur vectors are computed; */
+/* >           = 'I':  Z is initialized to the unit matrix and the matrix Z */
+/* >                   of Schur vectors of H is returned; */
+/* >           = 'V':  Z must contain an orthogonal matrix Q on entry, and */
+/* >                   the product Q*Z is returned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >           The order of the matrix H.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* > */
+/* >           It is assumed that H is already upper triangular in rows */
+/* >           and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
+/* >           set by a previous call to SGEBAL, and then passed to ZGEHRD */
+/* >           when the matrix output by SGEBAL is reduced to Hessenberg */
+/* >           form. Otherwise ILO and IHI should be set to 1 and N */
+/* >           respectively.  If N > 0, then 1 <= ILO <= IHI <= N. */
+/* >           If N = 0, then ILO = 1 and IHI = 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] H */
+/* > \verbatim */
+/* >          H is REAL array, dimension (LDH,N) */
+/* >           On entry, the upper Hessenberg matrix H. */
+/* >           On exit, if INFO = 0 and JOB = 'S', then H contains the */
+/* >           upper quasi-triangular matrix T from the Schur decomposition */
+/* >           (the Schur form); 2-by-2 diagonal blocks (corresponding to */
+/* >           complex conjugate pairs of eigenvalues) are returned in */
+/* >           standard form, with H(i,i) = H(i+1,i+1) and */
+/* >           H(i+1,i)*H(i,i+1) < 0. If INFO = 0 and JOB = 'E', the */
+/* >           contents of H are unspecified on exit.  (The output value of */
+/* >           H when INFO > 0 is given under the description of INFO */
+/* >           below.) */
+/* > */
+/* >           Unlike earlier versions of SHSEQR, this subroutine may */
+/* >           explicitly H(i,j) = 0 for i > j and j = 1, 2, ... ILO-1 */
+/* >           or j = IHI+1, IHI+2, ... N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDH */
+/* > \verbatim */
+/* >          LDH is INTEGER */
+/* >           The leading dimension of the array H. LDH >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WR */
+/* > \verbatim */
+/* >          WR is REAL array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WI */
+/* > \verbatim */
+/* >          WI is REAL array, dimension (N) */
+/* > */
+/* >           The real and imaginary parts, respectively, of the computed */
+/* >           eigenvalues. If two eigenvalues are computed as a complex */
+/* >           conjugate pair, they are stored in consecutive elements of */
+/* >           WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and */
+/* >           WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in */
+/* >           the same order as on the diagonal of the Schur form returned */
+/* >           in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 */
+/* >           diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */
+/* >           WI(i+1) = -WI(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Z */
+/* > \verbatim */
+/* >          Z is REAL array, dimension (LDZ,N) */
+/* >           If COMPZ = 'N', Z is not referenced. */
+/* >           If COMPZ = 'I', on entry Z need not be set and on exit, */
+/* >           if INFO = 0, Z contains the orthogonal matrix Z of the Schur */
+/* >           vectors of H.  If COMPZ = 'V', on entry Z must contain an */
+/* >           N-by-N matrix Q, which is assumed to be equal to the unit */
+/* >           matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, */
+/* >           if INFO = 0, Z contains Q*Z. */
+/* >           Normally Q is the orthogonal matrix generated by SORGHR */
+/* >           after the call to SGEHRD which formed the Hessenberg matrix */
+/* >           H. (The output value of Z when INFO > 0 is given under */
+/* >           the description of INFO below.) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDZ */
+/* > \verbatim */
+/* >          LDZ is INTEGER */
+/* >           The leading dimension of the array Z.  if COMPZ = 'I' or */
+/* >           COMPZ = 'V', then LDZ >= MAX(1,N).  Otherwise, LDZ >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (LWORK) */
+/* >           On exit, if INFO = 0, WORK(1) returns an estimate of */
+/* >           the optimal value for LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >           The dimension of the array WORK.  LWORK >= f2cmax(1,N) */
+/* >           is sufficient and delivers very good and sometimes */
+/* >           optimal performance.  However, LWORK as large as 11*N */
+/* >           may be required for optimal performance.  A workspace */
+/* >           query is recommended to determine the optimal workspace */
+/* >           size. */
+/* > */
+/* >           If LWORK = -1, then SHSEQR does a workspace query. */
+/* >           In this case, SHSEQR checks the input parameters and */
+/* >           estimates the optimal workspace size for the given */
+/* >           values of N, ILO and IHI.  The estimate is returned */
+/* >           in WORK(1).  No error message related to LWORK is */
+/* >           issued by XERBLA.  Neither H nor Z are accessed. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >             = 0:  successful exit */
+/* >             < 0:  if INFO = -i, the i-th argument had an illegal */
+/* >                    value */
+/* >             > 0:  if INFO = i, SHSEQR failed to compute all of */
+/* >                the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR */
+/* >                and WI contain those eigenvalues which have been */
+/* >                successfully computed.  (Failures are rare.) */
+/* > */
+/* >                If INFO > 0 and JOB = 'E', then on exit, the */
+/* >                remaining unconverged eigenvalues are the eigen- */
+/* >                values of the upper Hessenberg matrix rows and */
+/* >                columns ILO through INFO of the final, output */
+/* >                value of H. */
+/* > */
+/* >                If INFO > 0 and JOB   = 'S', then on exit */
+/* > */
+/* >           (*)  (initial value of H)*U  = U*(final value of H) */
+/* > */
+/* >                where U is an orthogonal matrix.  The final */
+/* >                value of H is upper Hessenberg and quasi-triangular */
+/* >                in rows and columns INFO+1 through IHI. */
+/* > */
+/* >                If INFO > 0 and COMPZ = 'V', then on exit */
+/* > */
+/* >                  (final value of Z)  =  (initial value of Z)*U */
+/* > */
+/* >                where U is the orthogonal matrix in (*) (regard- */
+/* >                less of the value of JOB.) */
+/* > */
+/* >                If INFO > 0 and COMPZ = 'I', then on exit */
+/* >                      (final value of Z)  = U */
+/* >                where U is the orthogonal matrix in (*) (regard- */
+/* >                less of the value of JOB.) */
+/* > */
+/* >                If INFO > 0 and COMPZ = 'N', then Z is not */
+/* >                accessed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realOTHERcomputational */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >       Karen Braman and Ralph Byers, Department of Mathematics, */
+/* >       University of Kansas, USA */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >             Default values supplied by */
+/* >             ILAENV(ISPEC,'SHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). */
+/* >             It is suggested that these defaults be adjusted in order */
+/* >             to attain best performance in each particular */
+/* >             computational environment. */
+/* > */
+/* >            ISPEC=12: The SLAHQR vs SLAQR0 crossover point. */
+/* >                      Default: 75. (Must be at least 11.) */
+/* > */
+/* >            ISPEC=13: Recommended deflation window size. */
+/* >                      This depends on ILO, IHI and NS.  NS is the */
+/* >                      number of simultaneous shifts returned */
+/* >                      by ILAENV(ISPEC=15).  (See ISPEC=15 below.) */
+/* >                      The default for (IHI-ILO+1) <= 500 is NS. */
+/* >                      The default for (IHI-ILO+1) >  500 is 3*NS/2. */
+/* > */
+/* >            ISPEC=14: Nibble crossover point. (See IPARMQ for */
+/* >                      details.)  Default: 14% of deflation window */
+/* >                      size. */
+/* > */
+/* >            ISPEC=15: Number of simultaneous shifts in a multishift */
+/* >                      QR iteration. */
+/* > */
+/* >                      If IHI-ILO+1 is ... */
+/* > */
+/* >                      greater than      ...but less    ... the */
+/* >                      or equal to ...      than        default is */
+/* > */
+/* >                           1               30          NS =   2(+) */
+/* >                          30               60          NS =   4(+) */
+/* >                          60              150          NS =  10(+) */
+/* >                         150              590          NS =  ** */
+/* >                         590             3000          NS =  64 */
+/* >                        3000             6000          NS = 128 */
+/* >                        6000             infinity      NS = 256 */
+/* > */
+/* >                  (+)  By default some or all matrices of this order */
+/* >                       are passed to the implicit double shift routine */
+/* >                       SLAHQR and this parameter is ignored.  See */
+/* >                       ISPEC=12 above and comments in IPARMQ for */
+/* >                       details. */
+/* > */
+/* >                 (**)  The asterisks (**) indicate an ad-hoc */
+/* >                       function of N increasing from 10 to 64. */
+/* > */
+/* >            ISPEC=16: Select structured matrix multiply. */
+/* >                      If the number of simultaneous shifts (specified */
+/* >                      by ISPEC=15) is less than 14, then the default */
+/* >                      for ISPEC=16 is 0.  Otherwise the default for */
+/* >                      ISPEC=16 is 2. */
+/* > \endverbatim */
+
+/* > \par References: */
+/*  ================ */
+/* > */
+/* >       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */
+/* >       Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */
+/* >       Performance, SIAM Journal of Matrix Analysis, volume 23, pages */
+/* >       929--947, 2002. */
+/* > \n */
+/* >       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */
+/* >       Algorithm Part II: Aggressive Early Deflation, SIAM Journal */
+/* >       of Matrix Analysis, volume 23, pages 948--973, 2002. */
+
+/*  ===================================================================== */
+/* Subroutine */ int shseqr_(char *job, char *compz, integer *n, integer *ilo,
+	 integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__,
+	 integer *ldz, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3;
+    real r__1;
+    char ch__1[2];
+
+    /* Local variables */
+    integer kbot, nmin, i__;
+    extern logical lsame_(char *, char *);
+    logical initz;
+    real workl[49];
+    logical wantt, wantz;
+    extern /* Subroutine */ int slaqr0_(logical *, logical *, integer *, 
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , integer *, real *, integer *, real *, integer *, integer *);
+    real hl[2401]	/* was [49][49] */;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slahqr_(logical *, logical *, integer *, 
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , integer *, real *, integer *, integer *), slacpy_(char *, 
+	    integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
+	    real *, integer *);
+    logical lquery;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     ==== Matrices of order NTINY or smaller must be processed by */
+/*     .    SLAHQR because of insufficient subdiagonal scratch space. */
+/*     .    (This is a hard limit.) ==== */
+
+/*     ==== NL allocates some local workspace to help small matrices */
+/*     .    through a rare SLAHQR failure.  NL > NTINY = 15 is */
+/*     .    required and NL <= NMIN = ILAENV(ISPEC=12,...) is recom- */
+/*     .    mended.  (The default value of NMIN is 75.)  Using NL = 49 */
+/*     .    allows up to six simultaneous shifts and a 16-by-16 */
+/*     .    deflation window.  ==== */
+
+/*     ==== Decode and check the input parameters. ==== */
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1 * 1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    wantt = lsame_(job, "S");
+    initz = lsame_(compz, "I");
+    wantz = initz || lsame_(compz, "V");
+    work[1] = (real) f2cmax(1,*n);
+    lquery = *lwork == -1;
+
+    *info = 0;
+    if (! lsame_(job, "E") && ! wantt) {
+	*info = -1;
+    } else if (! lsame_(compz, "N") && ! wantz) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
+	*info = -4;
+    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
+	*info = -5;
+    } else if (*ldh < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldz < 1 || wantz && *ldz < f2cmax(1,*n)) {
+	*info = -11;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -13;
+    }
+
+    if (*info != 0) {
+
+/*        ==== Quick return in case of invalid argument. ==== */
+
+	i__1 = -(*info);
+	xerbla_("SHSEQR", &i__1, (ftnlen)6);
+	return 0;
+
+    } else if (*n == 0) {
+
+/*        ==== Quick return in case N = 0; nothing to do. ==== */
+
+	return 0;
+
+    } else if (lquery) {
+
+/*        ==== Quick return in case of a workspace query ==== */
+
+	slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
+		1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info);
+/*        ==== Ensure reported workspace size is backward-compatible with */
+/*        .    previous LAPACK versions. ==== */
+/* Computing MAX */
+	r__1 = (real) f2cmax(1,*n);
+	work[1] = f2cmax(r__1,work[1]);
+	return 0;
+
+    } else {
+
+/*        ==== copy eigenvalues isolated by SGEBAL ==== */
+
+	i__1 = *ilo - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    wr[i__] = h__[i__ + i__ * h_dim1];
+	    wi[i__] = 0.f;
+/* L10: */
+	}
+	i__1 = *n;
+	for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	    wr[i__] = h__[i__ + i__ * h_dim1];
+	    wi[i__] = 0.f;
+/* L20: */
+	}
+
+/*        ==== Initialize Z, if requested ==== */
+
+	if (initz) {
+	    slaset_("A", n, n, &c_b11, &c_b12, &z__[z_offset], ldz)
+		    ;
+	}
+
+/*        ==== Quick return if possible ==== */
+
+	if (*ilo == *ihi) {
+	    wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	    wi[*ilo] = 0.f;
+	    return 0;
+	}
+
+/*        ==== SLAHQR/SLAQR0 crossover point ==== */
+
+/* Writing concatenation */
+	i__2[0] = 1, a__1[0] = job;
+	i__2[1] = 1, a__1[1] = compz;
+	s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+	nmin = ilaenv_(&c__12, "SHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nmin = f2cmax(15,nmin);
+
+/*        ==== SLAQR0 for big matrices; SLAHQR for small ones ==== */
+
+	if (*n > nmin) {
+	    slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], 
+		    &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, 
+		    info);
+	} else {
+
+/*           ==== Small matrix ==== */
+
+	    slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], 
+		    &wi[1], ilo, ihi, &z__[z_offset], ldz, info);
+
+	    if (*info > 0) {
+
+/*              ==== A rare SLAHQR failure!  SLAQR0 sometimes succeeds */
+/*              .    when SLAHQR fails. ==== */
+
+		kbot = *info;
+
+		if (*n >= 49) {
+
+/*                 ==== Larger matrices have enough subdiagonal scratch */
+/*                 .    space to call SLAQR0 directly. ==== */
+
+		    slaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], 
+			    ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], 
+			    ldz, &work[1], lwork, info);
+
+		} else {
+
+/*                 ==== Tiny matrices don't have enough subdiagonal */
+/*                 .    scratch space to benefit from SLAQR0.  Hence, */
+/*                 .    tiny matrices must be copied into a larger */
+/*                 .    array before calling SLAQR0. ==== */
+
+		    slacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49);
+		    hl[*n + 1 + *n * 49 - 50] = 0.f;
+		    i__1 = 49 - *n;
+		    slaset_("A", &c__49, &i__1, &c_b11, &c_b11, &hl[(*n + 1) *
+			     49 - 49], &c__49);
+		    slaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &
+			    wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, 
+			    workl, &c__49, info);
+		    if (wantt || *info != 0) {
+			slacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh);
+		    }
+		}
+	    }
+	}
+
+/*        ==== Clear out the trash, if necessary. ==== */
+
+	if ((wantt || *info != 0) && *n > 2) {
+	    i__1 = *n - 2;
+	    i__3 = *n - 2;
+	    slaset_("L", &i__1, &i__3, &c_b11, &c_b11, &h__[h_dim1 + 3], ldh);
+	}
+
+/*        ==== Ensure reported workspace size is backward-compatible with */
+/*        .    previous LAPACK versions. ==== */
+
+/* Computing MAX */
+	r__1 = (real) f2cmax(1,*n);
+	work[1] = f2cmax(r__1,work[1]);
+    }
+
+/*     ==== End of SHSEQR ==== */
+
+    return 0;
+} /* shseqr_ */
+
diff --git a/lapack-netlib/SRC/sisnan.c b/lapack-netlib/SRC/sisnan.c
new file mode 100644
index 000000000..689e199fe
--- /dev/null
+++ b/lapack-netlib/SRC/sisnan.c
@@ -0,0 +1,469 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SISNAN tests input for NaN. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SISNAN + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sisnan.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sisnan.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sisnan.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       LOGICAL FUNCTION SISNAN( SIN ) */
+
+/*       REAL SIN */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SISNAN returns .TRUE. if its argument is NaN, and .FALSE. */
+/* > otherwise.  To be replaced by the Fortran 2003 intrinsic in the */
+/* > future. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIN */
+/* > \verbatim */
+/* >          SIN is REAL */
+/* >          Input to test for NaN. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup OTHERauxiliary */
+
+/*  ===================================================================== */
+logical sisnan_(real *sin__)
+{
+    /* System generated locals */
+    logical ret_val;
+
+    /* Local variables */
+    extern logical slaisnan_(real *, real *);
+
+
+/*  -- LAPACK auxiliary routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+    ret_val = slaisnan_(sin__, sin__);
+    return ret_val;
+} /* sisnan_ */
+
diff --git a/lapack-netlib/SRC/sla_gbamv.c b/lapack-netlib/SRC/sla_gbamv.c
new file mode 100644
index 000000000..4cc9a4596
--- /dev/null
+++ b/lapack-netlib/SRC/sla_gbamv.c
@@ -0,0 +1,815 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SLA_GBAMV performs a matrix-vector operation to calculate error bounds. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SLA_GBAMV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gba
+mv.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gba
+mv.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gba
+mv.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, */
+/*                             INCX, BETA, Y, INCY ) */
+
+/*       REAL               ALPHA, BETA */
+/*       INTEGER            INCX, INCY, LDAB, M, N, KL, KU, TRANS */
+/*       REAL               AB( LDAB, * ), X( * ), Y( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLA_GBAMV  performs one of the matrix-vector operations */
+/* > */
+/* >         y := alpha*abs(A)*abs(x) + beta*abs(y), */
+/* >    or   y := alpha*abs(A)**T*abs(x) + beta*abs(y), */
+/* > */
+/* > where alpha and beta are scalars, x and y are vectors and A is an */
+/* > m by n matrix. */
+/* > */
+/* > This function is primarily used in calculating error bounds. */
+/* > To protect against underflow during evaluation, components in */
+/* > the resulting vector are perturbed away from zero by (N+1) */
+/* > times the underflow threshold.  To prevent unnecessarily large */
+/* > errors for block-structure embedded in general matrices, */
+/* > "symbolically" zero components are not perturbed.  A zero */
+/* > entry is considered "symbolic" if all multiplications involved */
+/* > in computing that entry have at least one zero multiplicand. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is INTEGER */
+/* >           On entry, TRANS specifies the operation to be performed as */
+/* >           follows: */
+/* > */
+/* >             BLAS_NO_TRANS      y := alpha*abs(A)*abs(x) + beta*abs(y) */
+/* >             BLAS_TRANS         y := alpha*abs(A**T)*abs(x) + beta*abs(y) */
+/* >             BLAS_CONJ_TRANS    y := alpha*abs(A**T)*abs(x) + beta*abs(y) */
+/* > */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >           On entry, M specifies the number of rows of the matrix A. */
+/* >           M must be at least zero. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >           On entry, N specifies the number of columns of the matrix A. */
+/* >           N must be at least zero. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >           The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >           The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ALPHA */
+/* > \verbatim */
+/* >          ALPHA is REAL */
+/* >           On entry, ALPHA specifies the scalar alpha. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is REAL array, dimension ( LDAB, n ) */
+/* >           Before entry, the leading m by n part of the array AB must */
+/* >           contain the matrix of coefficients. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >           On entry, LDA specifies the first dimension of AB as declared */
+/* >           in the calling (sub) program. LDAB must be at least */
+/* >           f2cmax( 1, m ). */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension */
+/* >           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
+/* >           and at least */
+/* >           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
+/* >           Before entry, the incremented array X must contain the */
+/* >           vector x. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] INCX */
+/* > \verbatim */
+/* >          INCX is INTEGER */
+/* >           On entry, INCX specifies the increment for the elements of */
+/* >           X. INCX must not be zero. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] BETA */
+/* > \verbatim */
+/* >          BETA is REAL */
+/* >           On entry, BETA specifies the scalar beta. When BETA is */
+/* >           supplied as zero then Y need not be set on input. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Y */
+/* > \verbatim */
+/* >          Y is REAL array, dimension */
+/* >           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
+/* >           and at least */
+/* >           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
+/* >           Before entry with BETA non-zero, the incremented array Y */
+/* >           must contain the vector y. On exit, Y is overwritten by the */
+/* >           updated vector y. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] INCY */
+/* > \verbatim */
+/* >          INCY is INTEGER */
+/* >           On entry, INCY specifies the increment for the elements of */
+/* >           Y. INCY must not be zero. */
+/* >           Unchanged on exit. */
+/* > */
+/* >  Level 2 Blas routine. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup realGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sla_gbamv_(integer *trans, integer *m, integer *n, 
+	integer *kl, integer *ku, real *alpha, real *ab, integer *ldab, real *
+	x, integer *incx, real *beta, real *y, integer *incy)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
+    real r__1;
+
+    /* Local variables */
+    integer info;
+    real temp;
+    integer lenx, leny;
+    extern integer ilatrans_(char *);
+    real safe1;
+    integer i__, j;
+    logical symb_zero__;
+    integer kd, ke, iy, jx, kx, ky;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! (*trans == ilatrans_("N") || *trans == ilatrans_("T") || *trans == ilatrans_("C"))) {
+	info = 1;
+    } else if (*m < 0) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*kl < 0 || *kl > *m - 1) {
+	info = 4;
+    } else if (*ku < 0 || *ku > *n - 1) {
+	info = 5;
+    } else if (*ldab < *kl + *ku + 1) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    } else if (*incy == 0) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("SLA_GBAMV ", &info, (ftnlen)10);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
+	return 0;
+    }
+
+/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
+/*     up the start points in  X  and  Y. */
+
+    if (*trans == ilatrans_("N")) {
+	lenx = *n;
+	leny = *m;
+    } else {
+	lenx = *m;
+	leny = *n;
+    }
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (lenx - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (leny - 1) * *incy;
+    }
+
+/*     Set SAFE1 essentially to be the underflow threshold times the */
+/*     number of additions in each row. */
+
+    safe1 = slamch_("Safe minimum");
+    safe1 = (*n + 1) * safe1;
+
+/*     Form  y := alpha*abs(A)*abs(x) + beta*abs(y). */
+
+/*     The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */
+/*     the inexact flag.  Still doesn't help change the iteration order */
+/*     to per-column. */
+
+    kd = *ku + 1;
+    ke = *kl + 1;
+    iy = ky;
+    if (*incx == 1) {
+	if (*trans == ilatrans_("N")) {
+	    i__1 = leny;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (*beta == 0.f) {
+		    symb_zero__ = TRUE_;
+		    y[iy] = 0.f;
+		} else if (y[iy] == 0.f) {
+		    symb_zero__ = TRUE_;
+		} else {
+		    symb_zero__ = FALSE_;
+		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
+		}
+		if (*alpha != 0.f) {
+/* Computing MAX */
+		    i__2 = i__ - *kl;
+/* Computing MIN */
+		    i__4 = i__ + *ku;
+		    i__3 = f2cmin(i__4,lenx);
+		    for (j = f2cmax(i__2,1); j <= i__3; ++j) {
+			temp = (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(
+				r__1));
+			symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == 
+				0.f);
+			y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp;
+		    }
+		}
+		if (! symb_zero__) {
+		    y[iy] += r_sign(&safe1, &y[iy]);
+		}
+		iy += *incy;
+	    }
+	} else {
+	    i__1 = leny;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (*beta == 0.f) {
+		    symb_zero__ = TRUE_;
+		    y[iy] = 0.f;
+		} else if (y[iy] == 0.f) {
+		    symb_zero__ = TRUE_;
+		} else {
+		    symb_zero__ = FALSE_;
+		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
+		}
+		if (*alpha != 0.f) {
+/* Computing MAX */
+		    i__3 = i__ - *kl;
+/* Computing MIN */
+		    i__4 = i__ + *ku;
+		    i__2 = f2cmin(i__4,lenx);
+		    for (j = f2cmax(i__3,1); j <= i__2; ++j) {
+			temp = (r__1 = ab[ke - i__ + j + i__ * ab_dim1], abs(
+				r__1));
+			symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == 
+				0.f);
+			y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp;
+		    }
+		}
+		if (! symb_zero__) {
+		    y[iy] += r_sign(&safe1, &y[iy]);
+		}
+		iy += *incy;
+	    }
+	}
+    } else {
+	if (*trans == ilatrans_("N")) {
+	    i__1 = leny;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (*beta == 0.f) {
+		    symb_zero__ = TRUE_;
+		    y[iy] = 0.f;
+		} else if (y[iy] == 0.f) {
+		    symb_zero__ = TRUE_;
+		} else {
+		    symb_zero__ = FALSE_;
+		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
+		}
+		if (*alpha != 0.f) {
+		    jx = kx;
+/* Computing MAX */
+		    i__2 = i__ - *kl;
+/* Computing MIN */
+		    i__4 = i__ + *ku;
+		    i__3 = f2cmin(i__4,lenx);
+		    for (j = f2cmax(i__2,1); j <= i__3; ++j) {
+			temp = (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(
+				r__1));
+			symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == 
+				0.f);
+			y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp;
+			jx += *incx;
+		    }
+		}
+		if (! symb_zero__) {
+		    y[iy] += r_sign(&safe1, &y[iy]);
+		}
+		iy += *incy;
+	    }
+	} else {
+	    i__1 = leny;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (*beta == 0.f) {
+		    symb_zero__ = TRUE_;
+		    y[iy] = 0.f;
+		} else if (y[iy] == 0.f) {
+		    symb_zero__ = TRUE_;
+		} else {
+		    symb_zero__ = FALSE_;
+		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
+		}
+		if (*alpha != 0.f) {
+		    jx = kx;
+/* Computing MAX */
+		    i__3 = i__ - *kl;
+/* Computing MIN */
+		    i__4 = i__ + *ku;
+		    i__2 = f2cmin(i__4,lenx);
+		    for (j = f2cmax(i__3,1); j <= i__2; ++j) {
+			temp = (r__1 = ab[ke - i__ + j + i__ * ab_dim1], abs(
+				r__1));
+			symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == 
+				0.f);
+			y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp;
+			jx += *incx;
+		    }
+		}
+		if (! symb_zero__) {
+		    y[iy] += r_sign(&safe1, &y[iy]);
+		}
+		iy += *incy;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SLA_GBAMV */
+
+} /* sla_gbamv__ */
+
diff --git a/lapack-netlib/SRC/sla_gbrcond.c b/lapack-netlib/SRC/sla_gbrcond.c
new file mode 100644
index 000000000..4cb9a4da6
--- /dev/null
+++ b/lapack-netlib/SRC/sla_gbrcond.c
@@ -0,0 +1,791 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SLA_GBRCOND estimates the Skeel condition number for a general banded matrix. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SLA_GBRCOND + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gbr
+cond.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gbr
+cond.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gbr
+cond.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       REAL FUNCTION SLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, LDAFB, */
+/*                                  IPIV, CMODE, C, INFO, WORK, IWORK ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            N, LDAB, LDAFB, INFO, KL, KU, CMODE */
+/*       INTEGER            IWORK( * ), IPIV( * ) */
+/*       REAL               AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), */
+/*      $                   C( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    SLA_GBRCOND Estimates the Skeel condition number of  op(A) * op2(C) */
+/* >    where op2 is determined by CMODE as follows */
+/* >    CMODE =  1    op2(C) = C */
+/* >    CMODE =  0    op2(C) = I */
+/* >    CMODE = -1    op2(C) = inv(C) */
+/* >    The Skeel condition number  cond(A) = norminf( |inv(A)||A| ) */
+/* >    is computed by computing scaling factors R such that */
+/* >    diag(R)*A*op2(C) is row equilibrated and computing the standard */
+/* >    infinity-norm condition number. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >     Specifies the form of the system of equations: */
+/* >       = 'N':  A * X = B     (No transpose) */
+/* >       = 'T':  A**T * X = B  (Transpose) */
+/* >       = 'C':  A**H * X = B  (Conjugate Transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The number of linear equations, i.e., the order of the */
+/* >     matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >     The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >     The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is REAL array, dimension (LDAB,N) */
+/* >     On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */
+/* >     The j-th column of A is stored in the j-th column of the */
+/* >     array AB as follows: */
+/* >     AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >     The leading dimension of the array AB.  LDAB >= KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AFB */
+/* > \verbatim */
+/* >          AFB is REAL array, dimension (LDAFB,N) */
+/* >     Details of the LU factorization of the band matrix A, as */
+/* >     computed by SGBTRF.  U is stored as an upper triangular */
+/* >     band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */
+/* >     and the multipliers used during the factorization are stored */
+/* >     in rows KL+KU+2 to 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAFB */
+/* > \verbatim */
+/* >          LDAFB is INTEGER */
+/* >     The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     The pivot indices from the factorization A = P*L*U */
+/* >     as computed by SGBTRF; row i of the matrix was interchanged */
+/* >     with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] CMODE */
+/* > \verbatim */
+/* >          CMODE is INTEGER */
+/* >     Determines op2(C) in the formula op(A) * op2(C) as follows: */
+/* >     CMODE =  1    op2(C) = C */
+/* >     CMODE =  0    op2(C) = I */
+/* >     CMODE = -1    op2(C) = inv(C) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (N) */
+/* >     The vector C in the formula op(A) * op2(C). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. */
+/* >     i > 0:  The ith argument is invalid. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (5*N). */
+/* >     Workspace. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N). */
+/* >     Workspace. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGBcomputational */
+
+/*  ===================================================================== */
+real sla_gbrcond_(char *trans, integer *n, integer *kl, integer *ku, real *
+	ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, integer *
+	cmode, real *c__, integer *info, real *work, integer *iwork)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4;
+    real ret_val, r__1;
+
+    /* Local variables */
+    integer kase, i__, j;
+    extern logical lsame_(char *, char *);
+    integer isave[3];
+    extern /* Subroutine */ int slacn2_(integer *, real *, real *, integer *, 
+	    real *, integer *, integer *);
+    integer kd, ke;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    real ainvnm;
+    extern /* Subroutine */ int sgbtrs_(char *, integer *, integer *, integer 
+	    *, integer *, real *, integer *, integer *, real *, integer *, 
+	    integer *);
+    real tmp;
+    logical notrans;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    afb_dim1 = *ldafb;
+    afb_offset = 1 + afb_dim1 * 1;
+    afb -= afb_offset;
+    --ipiv;
+    --c__;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    ret_val = 0.f;
+
+    *info = 0;
+    notrans = lsame_(trans, "N");
+    if (! notrans && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0 || *kl > *n - 1) {
+	*info = -3;
+    } else if (*ku < 0 || *ku > *n - 1) {
+	*info = -4;
+    } else if (*ldab < *kl + *ku + 1) {
+	*info = -6;
+    } else if (*ldafb < (*kl << 1) + *ku + 1) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLA_GBRCOND", &i__1, (ftnlen)11);
+	return ret_val;
+    }
+    if (*n == 0) {
+	ret_val = 1.f;
+	return ret_val;
+    }
+
+/*     Compute the equilibration matrix R such that */
+/*     inv(R)*A*C has unit 1-norm. */
+
+    kd = *ku + 1;
+    ke = *kl + 1;
+    if (notrans) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    tmp = 0.f;
+	    if (*cmode == 1) {
+/* Computing MAX */
+		i__2 = i__ - *kl;
+/* Computing MIN */
+		i__4 = i__ + *ku;
+		i__3 = f2cmin(i__4,*n);
+		for (j = f2cmax(i__2,1); j <= i__3; ++j) {
+		    tmp += (r__1 = ab[kd + i__ - j + j * ab_dim1] * c__[j], 
+			    abs(r__1));
+		}
+	    } else if (*cmode == 0) {
+/* Computing MAX */
+		i__3 = i__ - *kl;
+/* Computing MIN */
+		i__4 = i__ + *ku;
+		i__2 = f2cmin(i__4,*n);
+		for (j = f2cmax(i__3,1); j <= i__2; ++j) {
+		    tmp += (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(r__1));
+		}
+	    } else {
+/* Computing MAX */
+		i__2 = i__ - *kl;
+/* Computing MIN */
+		i__4 = i__ + *ku;
+		i__3 = f2cmin(i__4,*n);
+		for (j = f2cmax(i__2,1); j <= i__3; ++j) {
+		    tmp += (r__1 = ab[kd + i__ - j + j * ab_dim1] / c__[j], 
+			    abs(r__1));
+		}
+	    }
+	    work[(*n << 1) + i__] = tmp;
+	}
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    tmp = 0.f;
+	    if (*cmode == 1) {
+/* Computing MAX */
+		i__3 = i__ - *kl;
+/* Computing MIN */
+		i__4 = i__ + *ku;
+		i__2 = f2cmin(i__4,*n);
+		for (j = f2cmax(i__3,1); j <= i__2; ++j) {
+		    tmp += (r__1 = ab[ke - i__ + j + i__ * ab_dim1] * c__[j], 
+			    abs(r__1));
+		}
+	    } else if (*cmode == 0) {
+/* Computing MAX */
+		i__2 = i__ - *kl;
+/* Computing MIN */
+		i__4 = i__ + *ku;
+		i__3 = f2cmin(i__4,*n);
+		for (j = f2cmax(i__2,1); j <= i__3; ++j) {
+		    tmp += (r__1 = ab[ke - i__ + j + i__ * ab_dim1], abs(r__1)
+			    );
+		}
+	    } else {
+/* Computing MAX */
+		i__3 = i__ - *kl;
+/* Computing MIN */
+		i__4 = i__ + *ku;
+		i__2 = f2cmin(i__4,*n);
+		for (j = f2cmax(i__3,1); j <= i__2; ++j) {
+		    tmp += (r__1 = ab[ke - i__ + j + i__ * ab_dim1] / c__[j], 
+			    abs(r__1));
+		}
+	    }
+	    work[(*n << 1) + i__] = tmp;
+	}
+    }
+
+/*     Estimate the norm of inv(op(A)). */
+
+    ainvnm = 0.f;
+    kase = 0;
+L10:
+    slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
+    if (kase != 0) {
+	if (kase == 2) {
+
+/*           Multiply by R. */
+
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] *= work[(*n << 1) + i__];
+	    }
+	    if (notrans) {
+		sgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], 
+			ldafb, &ipiv[1], &work[1], n, info);
+	    } else {
+		sgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], 
+			ldafb, &ipiv[1], &work[1], n, info);
+	    }
+
+/*           Multiply by inv(C). */
+
+	    if (*cmode == 1) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    work[i__] /= c__[i__];
+		}
+	    } else if (*cmode == -1) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    work[i__] *= c__[i__];
+		}
+	    }
+	} else {
+
+/*           Multiply by inv(C**T). */
+
+	    if (*cmode == 1) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    work[i__] /= c__[i__];
+		}
+	    } else if (*cmode == -1) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    work[i__] *= c__[i__];
+		}
+	    }
+	    if (notrans) {
+		sgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], 
+			ldafb, &ipiv[1], &work[1], n, info);
+	    } else {
+		sgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], 
+			ldafb, &ipiv[1], &work[1], n, info);
+	    }
+
+/*           Multiply by R. */
+
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] *= work[(*n << 1) + i__];
+	    }
+	}
+	goto L10;
+    }
+
+/*     Compute the estimate of the reciprocal condition number. */
+
+    if (ainvnm != 0.f) {
+	ret_val = 1.f / ainvnm;
+    }
+
+    return ret_val;
+
+} /* sla_gbrcond__ */
+
diff --git a/lapack-netlib/SRC/sla_gbrfsx_extended.c b/lapack-netlib/SRC/sla_gbrfsx_extended.c
new file mode 100644
index 000000000..7a4c06b85
--- /dev/null
+++ b/lapack-netlib/SRC/sla_gbrfsx_extended.c
@@ -0,0 +1,1135 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b6 = -1.f;
+static real c_b8 = 1.f;
+
+/* > \brief \b SLA_GBRFSX_EXTENDED improves the computed solution to a system of linear equations for general 
+banded matrices by performing extra-precise iterative refinement and provides error bounds and backwar
+d error estimates for the solution. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SLA_GBRFSX_EXTENDED + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gbr
+fsx_extended.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gbr
+fsx_extended.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gbr
+fsx_extended.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, */
+/*                                       NRHS, AB, LDAB, AFB, LDAFB, IPIV, */
+/*                                       COLEQU, C, B, LDB, Y, LDY, */
+/*                                       BERR_OUT, N_NORMS, ERR_BNDS_NORM, */
+/*                                       ERR_BNDS_COMP, RES, AYB, DY, */
+/*                                       Y_TAIL, RCOND, ITHRESH, RTHRESH, */
+/*                                       DZ_UB, IGNORE_CWISE, INFO ) */
+
+/*       INTEGER            INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, */
+/*      $                   PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH */
+/*       LOGICAL            COLEQU, IGNORE_CWISE */
+/*       REAL               RTHRESH, DZ_UB */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */
+/*      $                   Y( LDY, * ), RES(*), DY(*), Y_TAIL(*) */
+/*       REAL               C( * ), AYB(*), RCOND, BERR_OUT(*), */
+/*      $                   ERR_BNDS_NORM( NRHS, * ), */
+/*      $                   ERR_BNDS_COMP( NRHS, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLA_GBRFSX_EXTENDED improves the computed solution to a system of */
+/* > linear equations by performing extra-precise iterative refinement */
+/* > and provides error bounds and backward error estimates for the solution. */
+/* > This subroutine is called by SGBRFSX to perform iterative refinement. */
+/* > In addition to normwise error bound, the code provides maximum */
+/* > componentwise error bound if possible. See comments for ERR_BNDS_NORM */
+/* > and ERR_BNDS_COMP for details of the error bounds. Note that this */
+/* > subroutine is only resonsible for setting the second fields of */
+/* > ERR_BNDS_NORM and ERR_BNDS_COMP. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] PREC_TYPE */
+/* > \verbatim */
+/* >          PREC_TYPE is INTEGER */
+/* >     Specifies the intermediate precision to be used in refinement. */
+/* >     The value is defined by ILAPREC(P) where P is a CHARACTER and P */
+/* >          = 'S':  Single */
+/* >          = 'D':  Double */
+/* >          = 'I':  Indigenous */
+/* >          = 'X' or 'E':  Extra */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS_TYPE */
+/* > \verbatim */
+/* >          TRANS_TYPE is INTEGER */
+/* >     Specifies the transposition operation on A. */
+/* >     The value is defined by ILATRANS(T) where T is a CHARACTER and T */
+/* >          = 'N':  No transpose */
+/* >          = 'T':  Transpose */
+/* >          = 'C':  Conjugate transpose */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The number of linear equations, i.e., the order of the */
+/* >     matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >     The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >     The number of superdiagonals within the band of A.  KU >= 0 */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >     The number of right-hand-sides, i.e., the number of columns of the */
+/* >     matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is REAL array, dimension (LDAB,N) */
+/* >     On entry, the N-by-N matrix AB. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >     The leading dimension of the array AB.  LDAB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AFB */
+/* > \verbatim */
+/* >          AFB is REAL array, dimension (LDAFB,N) */
+/* >     The factors L and U from the factorization */
+/* >     A = P*L*U as computed by SGBTRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAFB */
+/* > \verbatim */
+/* >          LDAFB is INTEGER */
+/* >     The leading dimension of the array AF.  LDAFB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     The pivot indices from the factorization A = P*L*U */
+/* >     as computed by SGBTRF; row i of the matrix was interchanged */
+/* >     with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] COLEQU */
+/* > \verbatim */
+/* >          COLEQU is LOGICAL */
+/* >     If .TRUE. then column equilibration was done to A before calling */
+/* >     this routine. This is needed to compute the solution and error */
+/* >     bounds correctly. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (N) */
+/* >     The column scale factors for A. If COLEQU = .FALSE., C */
+/* >     is not accessed. If C is input, each element of C should be a power */
+/* >     of the radix to ensure a reliable solution and error estimates. */
+/* >     Scaling by powers of the radix does not cause rounding errors unless */
+/* >     the result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >     The right-hand-side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >     The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Y */
+/* > \verbatim */
+/* >          Y is REAL array, dimension (LDY,NRHS) */
+/* >     On entry, the solution matrix X, as computed by SGBTRS. */
+/* >     On exit, the improved solution matrix Y. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDY */
+/* > \verbatim */
+/* >          LDY is INTEGER */
+/* >     The leading dimension of the array Y.  LDY >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR_OUT */
+/* > \verbatim */
+/* >          BERR_OUT is REAL array, dimension (NRHS) */
+/* >     On exit, BERR_OUT(j) contains the componentwise relative backward */
+/* >     error for right-hand-side j from the formula */
+/* >         f2cmax(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
+/* >     where abs(Z) is the componentwise absolute value of the matrix */
+/* >     or vector Z. This is computed by SLA_LIN_BERR. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N_NORMS */
+/* > \verbatim */
+/* >          N_NORMS is INTEGER */
+/* >     Determines which error bounds to return (see ERR_BNDS_NORM */
+/* >     and ERR_BNDS_COMP). */
+/* >     If N_NORMS >= 1 return normwise error bounds. */
+/* >     If N_NORMS >= 2 return componentwise error bounds. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] ERR_BNDS_NORM */
+/* > \verbatim */
+/* >          ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     normwise relative error, which is defined as follows: */
+/* > */
+/* >     Normwise relative error in the ith solution vector: */
+/* >             max_j (abs(XTRUE(j,i) - X(j,i))) */
+/* >            ------------------------------ */
+/* >                  max_j abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the type of error information as described */
+/* >     below. There currently are up to three pieces of information */
+/* >     returned. */
+/* > */
+/* >     The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_NORM(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated normwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * slamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*A, where S scales each row by a power of the */
+/* >              radix so all absolute row sums of Z are approximately 1. */
+/* > */
+/* >     This subroutine is only responsible for setting the second field */
+/* >     above. */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] ERR_BNDS_COMP */
+/* > \verbatim */
+/* >          ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     componentwise relative error, which is defined as follows: */
+/* > */
+/* >     Componentwise relative error in the ith solution vector: */
+/* >                    abs(XTRUE(j,i) - X(j,i)) */
+/* >             max_j ---------------------- */
+/* >                         abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the right-hand side i (on which the */
+/* >     componentwise relative error depends), and the type of error */
+/* >     information as described below. There currently are up to three */
+/* >     pieces of information returned for each right-hand side. If */
+/* >     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
+/* >     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most */
+/* >     the first (:,N_ERR_BNDS) entries are returned. */
+/* > */
+/* >     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_COMP(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated componentwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * slamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*(A*diag(x)), where x is the solution for the */
+/* >              current right-hand side and S scales each row of */
+/* >              A*diag(x) by a power of the radix so all absolute row */
+/* >              sums of Z are approximately 1. */
+/* > */
+/* >     This subroutine is only responsible for setting the second field */
+/* >     above. */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RES */
+/* > \verbatim */
+/* >          RES is REAL array, dimension (N) */
+/* >     Workspace to hold the intermediate residual. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AYB */
+/* > \verbatim */
+/* >          AYB is REAL array, dimension (N) */
+/* >     Workspace. This can be the same workspace passed for Y_TAIL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DY */
+/* > \verbatim */
+/* >          DY is REAL array, dimension (N) */
+/* >     Workspace to hold the intermediate solution. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] Y_TAIL */
+/* > \verbatim */
+/* >          Y_TAIL is REAL array, dimension (N) */
+/* >     Workspace to hold the trailing bits of the intermediate solution. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >     Reciprocal scaled condition number.  This is an estimate of the */
+/* >     reciprocal Skeel condition number of the matrix A after */
+/* >     equilibration (if done).  If this is less than the machine */
+/* >     precision (in particular, if it is zero), the matrix is singular */
+/* >     to working precision.  Note that the error may still be small even */
+/* >     if this number is very small and the matrix appears ill- */
+/* >     conditioned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ITHRESH */
+/* > \verbatim */
+/* >          ITHRESH is INTEGER */
+/* >     The maximum number of residual computations allowed for */
+/* >     refinement. The default is 10. For 'aggressive' set to 100 to */
+/* >     permit convergence using approximate factorizations or */
+/* >     factorizations other than LU. If the factorization uses a */
+/* >     technique other than Gaussian elimination, the guarantees in */
+/* >     ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RTHRESH */
+/* > \verbatim */
+/* >          RTHRESH is REAL */
+/* >     Determines when to stop refinement if the error estimate stops */
+/* >     decreasing. Refinement will stop when the next solution no longer */
+/* >     satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */
+/* >     the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */
+/* >     default value is 0.5. For 'aggressive' set to 0.9 to permit */
+/* >     convergence on extremely ill-conditioned matrices. See LAWN 165 */
+/* >     for more details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DZ_UB */
+/* > \verbatim */
+/* >          DZ_UB is REAL */
+/* >     Determines when to start considering componentwise convergence. */
+/* >     Componentwise convergence is only considered after each component */
+/* >     of the solution Y is stable, which we definte as the relative */
+/* >     change in each component being less than DZ_UB. The default value */
+/* >     is 0.25, requiring the first bit to be stable. See LAWN 165 for */
+/* >     more details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IGNORE_CWISE */
+/* > \verbatim */
+/* >          IGNORE_CWISE is LOGICAL */
+/* >     If .TRUE. then ignore componentwise convergence. Default value */
+/* >     is .FALSE.. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. */
+/* >       < 0:  if INFO = -i, the ith argument to SGBTRS had an illegal */
+/* >             value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup realGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sla_gbrfsx_extended_(integer *prec_type__, integer *
+	trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, 
+	real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, 
+	logical *colequ, real *c__, real *b, integer *ldb, real *y, integer *
+	ldy, real *berr_out__, integer *n_norms__, real *err_bnds_norm__, 
+	real *err_bnds_comp__, real *res, real *ayb, real *dy, real *y_tail__,
+	 real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical 
+	*ignore_cwise__, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, 
+	    y_dim1, y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, 
+	    err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3;
+    real r__1, r__2;
+    char ch__1[1];
+
+    /* Local variables */
+    real dx_x__, dz_z__, ymin;
+    extern /* Subroutine */ int sla_lin_berr_(integer *, integer *, integer *
+	    , real *, real *, real *), blas_sgbmv_x__(integer *, integer *, 
+	    integer *, integer *, integer *, real *, real *, integer *, real *
+	    , integer *, real *, real *, integer *, integer *);
+    real dxratmax, dzratmax;
+    integer y_prec_state__, i__, j, m;
+    extern /* Subroutine */ int blas_sgbmv2_x_(integer *, integer *, integer 
+	    *, integer *, integer *, real *, real *, integer *, real *, real *
+	    , integer *, real *, real *, integer *, integer *), sla_gbamv__(
+	    integer *, integer *, integer *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *), 
+	    sgbmv_(char *, integer *, integer *, integer *, integer *, real *,
+	     real *, integer *, real *, integer *, real *, real *, integer *);
+    real dxrat;
+    logical incr_prec__;
+    real dzrat;
+    char trans[1];
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    real normx, normy;
+    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
+	    real *, integer *);
+    real myhugeval, prev_dz_z__, yk;
+    extern real slamch_(char *);
+    real final_dx_x__, final_dz_z__;
+    extern /* Subroutine */ int sgbtrs_(char *, integer *, integer *, integer 
+	    *, integer *, real *, integer *, integer *, real *, integer *, 
+	    integer *);
+    real normdx;
+    extern /* Subroutine */ int sla_wwaddw_(integer *, real *, real *, real *
+	    );
+    extern /* Character */ VOID chla_transtype_(char *, integer *);
+    real prevnormdx;
+    integer cnt;
+    real dyk, eps;
+    integer x_state__, z_state__;
+    real incr_thresh__;
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    err_bnds_comp_dim1 = *nrhs;
+    err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1;
+    err_bnds_comp__ -= err_bnds_comp_offset;
+    err_bnds_norm_dim1 = *nrhs;
+    err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1;
+    err_bnds_norm__ -= err_bnds_norm_offset;
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    afb_dim1 = *ldafb;
+    afb_offset = 1 + afb_dim1 * 1;
+    afb -= afb_offset;
+    --ipiv;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --berr_out__;
+    --res;
+    --ayb;
+    --dy;
+    --y_tail__;
+
+    /* Function Body */
+    if (*info != 0) {
+	return 0;
+    }
+    chla_transtype_(ch__1, trans_type__);
+    *(unsigned char *)trans = *(unsigned char *)&ch__1[0];
+    eps = slamch_("Epsilon");
+    myhugeval = slamch_("Overflow");
+/*     Force MYHUGEVAL to Inf */
+    myhugeval *= myhugeval;
+/*     Using MYHUGEVAL may lead to spurious underflows. */
+    incr_thresh__ = (real) (*n) * eps;
+    m = *kl + *ku + 1;
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+	y_prec_state__ = 1;
+	if (y_prec_state__ == 2) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		y_tail__[i__] = 0.f;
+	    }
+	}
+	dxrat = 0.f;
+	dxratmax = 0.f;
+	dzrat = 0.f;
+	dzratmax = 0.f;
+	final_dx_x__ = myhugeval;
+	final_dz_z__ = myhugeval;
+	prevnormdx = myhugeval;
+	prev_dz_z__ = myhugeval;
+	dz_z__ = myhugeval;
+	dx_x__ = myhugeval;
+	x_state__ = 1;
+	z_state__ = 0;
+	incr_prec__ = FALSE_;
+	i__2 = *ithresh;
+	for (cnt = 1; cnt <= i__2; ++cnt) {
+
+/*        Compute residual RES = B_s - op(A_s) * Y, */
+/*            op(A) = A, A**T, or A**H depending on TRANS (and type). */
+
+	    scopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
+	    if (y_prec_state__ == 0) {
+		sgbmv_(trans, &m, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[
+			j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1);
+	    } else if (y_prec_state__ == 1) {
+		blas_sgbmv_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[
+			ab_offset], ldab, &y[j * y_dim1 + 1], &c__1, &c_b8, &
+			res[1], &c__1, prec_type__);
+	    } else {
+		blas_sgbmv2_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[
+			ab_offset], ldab, &y[j * y_dim1 + 1], &y_tail__[1], &
+			c__1, &c_b8, &res[1], &c__1, prec_type__);
+	    }
+/*        XXX: RES is no longer needed. */
+	    scopy_(n, &res[1], &c__1, &dy[1], &c__1);
+	    sgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1]
+		    , &dy[1], n, info);
+
+/*         Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */
+
+	    normx = 0.f;
+	    normy = 0.f;
+	    normdx = 0.f;
+	    dz_z__ = 0.f;
+	    ymin = myhugeval;
+	    i__3 = *n;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		yk = (r__1 = y[i__ + j * y_dim1], abs(r__1));
+		dyk = (r__1 = dy[i__], abs(r__1));
+		if (yk != 0.f) {
+/* Computing MAX */
+		    r__1 = dz_z__, r__2 = dyk / yk;
+		    dz_z__ = f2cmax(r__1,r__2);
+		} else if (dyk != 0.f) {
+		    dz_z__ = myhugeval;
+		}
+		ymin = f2cmin(ymin,yk);
+		normy = f2cmax(normy,yk);
+		if (*colequ) {
+/* Computing MAX */
+		    r__1 = normx, r__2 = yk * c__[i__];
+		    normx = f2cmax(r__1,r__2);
+/* Computing MAX */
+		    r__1 = normdx, r__2 = dyk * c__[i__];
+		    normdx = f2cmax(r__1,r__2);
+		} else {
+		    normx = normy;
+		    normdx = f2cmax(normdx,dyk);
+		}
+	    }
+	    if (normx != 0.f) {
+		dx_x__ = normdx / normx;
+	    } else if (normdx == 0.f) {
+		dx_x__ = 0.f;
+	    } else {
+		dx_x__ = myhugeval;
+	    }
+	    dxrat = normdx / prevnormdx;
+	    dzrat = dz_z__ / prev_dz_z__;
+
+/*         Check termination criteria. */
+
+	    if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy 
+		    && y_prec_state__ < 2) {
+		incr_prec__ = TRUE_;
+	    }
+	    if (x_state__ == 3 && dxrat <= *rthresh) {
+		x_state__ = 1;
+	    }
+	    if (x_state__ == 1) {
+		if (dx_x__ <= eps) {
+		    x_state__ = 2;
+		} else if (dxrat > *rthresh) {
+		    if (y_prec_state__ != 2) {
+			incr_prec__ = TRUE_;
+		    } else {
+			x_state__ = 3;
+		    }
+		} else {
+		    if (dxrat > dxratmax) {
+			dxratmax = dxrat;
+		    }
+		}
+		if (x_state__ > 1) {
+		    final_dx_x__ = dx_x__;
+		}
+	    }
+	    if (z_state__ == 0 && dz_z__ <= *dz_ub__) {
+		z_state__ = 1;
+	    }
+	    if (z_state__ == 3 && dzrat <= *rthresh) {
+		z_state__ = 1;
+	    }
+	    if (z_state__ == 1) {
+		if (dz_z__ <= eps) {
+		    z_state__ = 2;
+		} else if (dz_z__ > *dz_ub__) {
+		    z_state__ = 0;
+		    dzratmax = 0.f;
+		    final_dz_z__ = myhugeval;
+		} else if (dzrat > *rthresh) {
+		    if (y_prec_state__ != 2) {
+			incr_prec__ = TRUE_;
+		    } else {
+			z_state__ = 3;
+		    }
+		} else {
+		    if (dzrat > dzratmax) {
+			dzratmax = dzrat;
+		    }
+		}
+		if (z_state__ > 1) {
+		    final_dz_z__ = dz_z__;
+		}
+	    }
+
+/*           Exit if both normwise and componentwise stopped working, */
+/*           but if componentwise is unstable, let it go at least two */
+/*           iterations. */
+
+	    if (x_state__ != 1) {
+		if (*ignore_cwise__) {
+		    goto L666;
+		}
+		if (z_state__ == 3 || z_state__ == 2) {
+		    goto L666;
+		}
+		if (z_state__ == 0 && cnt > 1) {
+		    goto L666;
+		}
+	    }
+	    if (incr_prec__) {
+		incr_prec__ = FALSE_;
+		++y_prec_state__;
+		i__3 = *n;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    y_tail__[i__] = 0.f;
+		}
+	    }
+	    prevnormdx = normdx;
+	    prev_dz_z__ = dz_z__;
+
+/*           Update soluton. */
+
+	    if (y_prec_state__ < 2) {
+		saxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1);
+	    } else {
+		sla_wwaddw_(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]);
+	    }
+	}
+/*        Target of "IF (Z_STOP .AND. X_STOP)".  Sun's f77 won't CALL MYEXIT. */
+L666:
+
+/*     Set final_* when cnt hits ithresh. */
+
+	if (x_state__ == 1) {
+	    final_dx_x__ = dx_x__;
+	}
+	if (z_state__ == 1) {
+	    final_dz_z__ = dz_z__;
+	}
+
+/*     Compute error bounds. */
+
+	if (*n_norms__ >= 1) {
+	    err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / (
+		    1 - dxratmax);
+	}
+	if (*n_norms__ >= 2) {
+	    err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / (
+		    1 - dzratmax);
+	}
+
+/*     Compute componentwise relative backward error from formula */
+/*         f2cmax(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
+/*     where abs(Z) is the componentwise absolute value of the matrix */
+/*     or vector Z. */
+
+/*        Compute residual RES = B_s - op(A_s) * Y, */
+/*            op(A) = A, A**T, or A**H depending on TRANS (and type). */
+
+	scopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
+	sgbmv_(trans, n, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[j * 
+		y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1);
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    ayb[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1));
+	}
+
+/*     Compute abs(op(A_s))*abs(Y) + abs(B_s). */
+
+	sla_gbamv_(trans_type__, n, n, kl, ku, &c_b8, &ab[ab_offset], ldab, &
+		y[j * y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1);
+	sla_lin_berr_(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]);
+
+/*     End of loop for each RHS */
+
+    }
+
+    return 0;
+} /* sla_gbrfsx_extended__ */
+
diff --git a/lapack-netlib/SRC/sla_gbrpvgrw.c b/lapack-netlib/SRC/sla_gbrpvgrw.c
new file mode 100644
index 000000000..11debe424
--- /dev/null
+++ b/lapack-netlib/SRC/sla_gbrpvgrw.c
@@ -0,0 +1,569 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SLA_GBRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a general banded m
+atrix. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SLA_GBRPVGRW + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gbr
+pvgrw.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gbr
+pvgrw.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gbr
+pvgrw.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       REAL FUNCTION SLA_GBRPVGRW( N, KL, KU, NCOLS, AB, LDAB, AFB, */
+/*                                   LDAFB ) */
+
+/*       INTEGER            N, KL, KU, NCOLS, LDAB, LDAFB */
+/*       REAL               AB( LDAB, * ), AFB( LDAFB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLA_GBRPVGRW computes the reciprocal pivot growth factor */
+/* > norm(A)/norm(U). The "f2cmax absolute element" norm is used. If this is */
+/* > much less than 1, the stability of the LU factorization of the */
+/* > (equilibrated) matrix A could be poor. This also means that the */
+/* > solution X, estimated condition numbers, and error bounds could be */
+/* > unreliable. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The number of linear equations, i.e., the order of the */
+/* >     matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >     The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >     The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NCOLS */
+/* > \verbatim */
+/* >          NCOLS is INTEGER */
+/* >     The number of columns of the matrix A.  NCOLS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is REAL array, dimension (LDAB,N) */
+/* >     On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */
+/* >     The j-th column of A is stored in the j-th column of the */
+/* >     array AB as follows: */
+/* >     AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >     The leading dimension of the array AB.  LDAB >= KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AFB */
+/* > \verbatim */
+/* >          AFB is REAL array, dimension (LDAFB,N) */
+/* >     Details of the LU factorization of the band matrix A, as */
+/* >     computed by SGBTRF.  U is stored as an upper triangular */
+/* >     band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */
+/* >     and the multipliers used during the factorization are stored */
+/* >     in rows KL+KU+2 to 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAFB */
+/* > \verbatim */
+/* >          LDAFB is INTEGER */
+/* >     The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGBcomputational */
+
+/*  ===================================================================== */
+real sla_gbrpvgrw_(integer *n, integer *kl, integer *ku, integer *ncols, 
+	real *ab, integer *ldab, real *afb, integer *ldafb)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4;
+    real ret_val, r__1, r__2;
+
+    /* Local variables */
+    real amax, umax;
+    integer i__, j, kd;
+    real rpvgrw;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    afb_dim1 = *ldafb;
+    afb_offset = 1 + afb_dim1 * 1;
+    afb -= afb_offset;
+
+    /* Function Body */
+    rpvgrw = 1.f;
+    kd = *ku + 1;
+    i__1 = *ncols;
+    for (j = 1; j <= i__1; ++j) {
+	amax = 0.f;
+	umax = 0.f;
+/* Computing MAX */
+	i__2 = j - *ku;
+/* Computing MIN */
+	i__4 = j + *kl;
+	i__3 = f2cmin(i__4,*n);
+	for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) {
+/* Computing MAX */
+	    r__2 = (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(r__1));
+	    amax = f2cmax(r__2,amax);
+	}
+/* Computing MAX */
+	i__3 = j - *ku;
+	i__2 = j;
+	for (i__ = f2cmax(i__3,1); i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    r__2 = (r__1 = afb[kd + i__ - j + j * afb_dim1], abs(r__1));
+	    umax = f2cmax(r__2,umax);
+	}
+	if (umax != 0.f) {
+/* Computing MIN */
+	    r__1 = amax / umax;
+	    rpvgrw = f2cmin(r__1,rpvgrw);
+	}
+    }
+    ret_val = rpvgrw;
+    return ret_val;
+} /* sla_gbrpvgrw__ */
+
diff --git a/lapack-netlib/SRC/sla_geamv.c b/lapack-netlib/SRC/sla_geamv.c
new file mode 100644
index 000000000..e841a7ad2
--- /dev/null
+++ b/lapack-netlib/SRC/sla_geamv.c
@@ -0,0 +1,779 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SLA_GEAMV computes a matrix-vector product using a general matrix to calculate error bounds. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SLA_GEAMV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_gea
+mv.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_gea
+mv.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_gea
+mv.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, */
+/*                              Y, INCY ) */
+
+/*       REAL               ALPHA, BETA */
+/*       INTEGER            INCX, INCY, LDA, M, N, TRANS */
+/*       REAL               A( LDA, * ), X( * ), Y( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLA_GEAMV  performs one of the matrix-vector operations */
+/* > */
+/* >         y := alpha*abs(A)*abs(x) + beta*abs(y), */
+/* >    or   y := alpha*abs(A)**T*abs(x) + beta*abs(y), */
+/* > */
+/* > where alpha and beta are scalars, x and y are vectors and A is an */
+/* > m by n matrix. */
+/* > */
+/* > This function is primarily used in calculating error bounds. */
+/* > To protect against underflow during evaluation, components in */
+/* > the resulting vector are perturbed away from zero by (N+1) */
+/* > times the underflow threshold.  To prevent unnecessarily large */
+/* > errors for block-structure embedded in general matrices, */
+/* > "symbolically" zero components are not perturbed.  A zero */
+/* > entry is considered "symbolic" if all multiplications involved */
+/* > in computing that entry have at least one zero multiplicand. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is INTEGER */
+/* >           On entry, TRANS specifies the operation to be performed as */
+/* >           follows: */
+/* > */
+/* >             BLAS_NO_TRANS      y := alpha*abs(A)*abs(x) + beta*abs(y) */
+/* >             BLAS_TRANS         y := alpha*abs(A**T)*abs(x) + beta*abs(y) */
+/* >             BLAS_CONJ_TRANS    y := alpha*abs(A**T)*abs(x) + beta*abs(y) */
+/* > */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >           On entry, M specifies the number of rows of the matrix A. */
+/* >           M must be at least zero. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >           On entry, N specifies the number of columns of the matrix A. */
+/* >           N must be at least zero. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ALPHA */
+/* > \verbatim */
+/* >          ALPHA is REAL */
+/* >           On entry, ALPHA specifies the scalar alpha. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension ( LDA, n ) */
+/* >           Before entry, the leading m by n part of the array A must */
+/* >           contain the matrix of coefficients. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >           On entry, LDA specifies the first dimension of A as declared */
+/* >           in the calling (sub) program. LDA must be at least */
+/* >           f2cmax( 1, m ). */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] X */
+/* > \verbatim */
+/* >          X is REAL array, dimension */
+/* >           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
+/* >           and at least */
+/* >           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
+/* >           Before entry, the incremented array X must contain the */
+/* >           vector x. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] INCX */
+/* > \verbatim */
+/* >          INCX is INTEGER */
+/* >           On entry, INCX specifies the increment for the elements of */
+/* >           X. INCX must not be zero. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] BETA */
+/* > \verbatim */
+/* >          BETA is REAL */
+/* >           On entry, BETA specifies the scalar beta. When BETA is */
+/* >           supplied as zero then Y need not be set on input. */
+/* >           Unchanged on exit. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Y */
+/* > \verbatim */
+/* >          Y is REAL array, */
+/* >           dimension at least */
+/* >           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
+/* >           and at least */
+/* >           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
+/* >           Before entry with BETA non-zero, the incremented array Y */
+/* >           must contain the vector y. On exit, Y is overwritten by the */
+/* >           updated vector y. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] INCY */
+/* > \verbatim */
+/* >          INCY is INTEGER */
+/* >           On entry, INCY specifies the increment for the elements of */
+/* >           Y. INCY must not be zero. */
+/* >           Unchanged on exit. */
+/* > */
+/* >  Level 2 Blas routine. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sla_geamv_(integer *trans, integer *m, integer *n, real 
+	*alpha, real *a, integer *lda, real *x, integer *incx, real *beta, 
+	real *y, integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1;
+
+    /* Local variables */
+    integer info;
+    real temp;
+    integer lenx, leny;
+    extern integer ilatrans_(char *);
+    real safe1;
+    integer i__, j;
+    logical symb_zero__;
+    integer iy, jx, kx, ky;
+    extern real slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! (*trans == ilatrans_("N") || *trans == ilatrans_("T") || *trans == ilatrans_("C"))) {
+	info = 1;
+    } else if (*m < 0) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*lda < f2cmax(1,*m)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    } else if (*incy == 0) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("SLA_GEAMV ", &info, (ftnlen)10);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
+	return 0;
+    }
+
+/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
+/*     up the start points in  X  and  Y. */
+
+    if (*trans == ilatrans_("N")) {
+	lenx = *n;
+	leny = *m;
+    } else {
+	lenx = *m;
+	leny = *n;
+    }
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (lenx - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (leny - 1) * *incy;
+    }
+
+/*     Set SAFE1 essentially to be the underflow threshold times the */
+/*     number of additions in each row. */
+
+    safe1 = slamch_("Safe minimum");
+    safe1 = (*n + 1) * safe1;
+
+/*     Form  y := alpha*abs(A)*abs(x) + beta*abs(y). */
+
+/*     The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */
+/*     the inexact flag.  Still doesn't help change the iteration order */
+/*     to per-column. */
+
+    iy = ky;
+    if (*incx == 1) {
+	if (*trans == ilatrans_("N")) {
+	    i__1 = leny;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (*beta == 0.f) {
+		    symb_zero__ = TRUE_;
+		    y[iy] = 0.f;
+		} else if (y[iy] == 0.f) {
+		    symb_zero__ = TRUE_;
+		} else {
+		    symb_zero__ = FALSE_;
+		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
+		}
+		if (*alpha != 0.f) {
+		    i__2 = lenx;
+		    for (j = 1; j <= i__2; ++j) {
+			temp = (r__1 = a[i__ + j * a_dim1], abs(r__1));
+			symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == 
+				0.f);
+			y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp;
+		    }
+		}
+		if (! symb_zero__) {
+		    y[iy] += r_sign(&safe1, &y[iy]);
+		}
+		iy += *incy;
+	    }
+	} else {
+	    i__1 = leny;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (*beta == 0.f) {
+		    symb_zero__ = TRUE_;
+		    y[iy] = 0.f;
+		} else if (y[iy] == 0.f) {
+		    symb_zero__ = TRUE_;
+		} else {
+		    symb_zero__ = FALSE_;
+		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
+		}
+		if (*alpha != 0.f) {
+		    i__2 = lenx;
+		    for (j = 1; j <= i__2; ++j) {
+			temp = (r__1 = a[j + i__ * a_dim1], abs(r__1));
+			symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == 
+				0.f);
+			y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp;
+		    }
+		}
+		if (! symb_zero__) {
+		    y[iy] += r_sign(&safe1, &y[iy]);
+		}
+		iy += *incy;
+	    }
+	}
+    } else {
+	if (*trans == ilatrans_("N")) {
+	    i__1 = leny;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (*beta == 0.f) {
+		    symb_zero__ = TRUE_;
+		    y[iy] = 0.f;
+		} else if (y[iy] == 0.f) {
+		    symb_zero__ = TRUE_;
+		} else {
+		    symb_zero__ = FALSE_;
+		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
+		}
+		if (*alpha != 0.f) {
+		    jx = kx;
+		    i__2 = lenx;
+		    for (j = 1; j <= i__2; ++j) {
+			temp = (r__1 = a[i__ + j * a_dim1], abs(r__1));
+			symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == 
+				0.f);
+			y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp;
+			jx += *incx;
+		    }
+		}
+		if (! symb_zero__) {
+		    y[iy] += r_sign(&safe1, &y[iy]);
+		}
+		iy += *incy;
+	    }
+	} else {
+	    i__1 = leny;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (*beta == 0.f) {
+		    symb_zero__ = TRUE_;
+		    y[iy] = 0.f;
+		} else if (y[iy] == 0.f) {
+		    symb_zero__ = TRUE_;
+		} else {
+		    symb_zero__ = FALSE_;
+		    y[iy] = *beta * (r__1 = y[iy], abs(r__1));
+		}
+		if (*alpha != 0.f) {
+		    jx = kx;
+		    i__2 = lenx;
+		    for (j = 1; j <= i__2; ++j) {
+			temp = (r__1 = a[j + i__ * a_dim1], abs(r__1));
+			symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == 
+				0.f);
+			y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp;
+			jx += *incx;
+		    }
+		}
+		if (! symb_zero__) {
+		    y[iy] += r_sign(&safe1, &y[iy]);
+		}
+		iy += *incy;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SLA_GEAMV */
+
+} /* sla_geamv__ */
+
diff --git a/lapack-netlib/SRC/sla_gercond.c b/lapack-netlib/SRC/sla_gercond.c
new file mode 100644
index 000000000..1ce7fc670
--- /dev/null
+++ b/lapack-netlib/SRC/sla_gercond.c
@@ -0,0 +1,735 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b SLA_GERCOND estimates the Skeel condition number for a general matrix. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SLA_GERCOND + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_ger
+cond.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_ger
+cond.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_ger
+cond.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       REAL FUNCTION SLA_GERCOND ( TRANS, N, A, LDA, AF, LDAF, IPIV, */
+/*                                   CMODE, C, INFO, WORK, IWORK ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            N, LDA, LDAF, INFO, CMODE */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       REAL               A( LDA, * ), AF( LDAF, * ), WORK( * ), */
+/*      $                   C( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    SLA_GERCOND estimates the Skeel condition number of op(A) * op2(C) */
+/* >    where op2 is determined by CMODE as follows */
+/* >    CMODE =  1    op2(C) = C */
+/* >    CMODE =  0    op2(C) = I */
+/* >    CMODE = -1    op2(C) = inv(C) */
+/* >    The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */
+/* >    is computed by computing scaling factors R such that */
+/* >    diag(R)*A*op2(C) is row equilibrated and computing the standard */
+/* >    infinity-norm condition number. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >     Specifies the form of the system of equations: */
+/* >       = 'N':  A * X = B     (No transpose) */
+/* >       = 'T':  A**T * X = B  (Transpose) */
+/* >       = 'C':  A**H * X = B  (Conjugate Transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The number of linear equations, i.e., the order of the */
+/* >     matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >     On entry, the N-by-N matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >     The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AF */
+/* > \verbatim */
+/* >          AF is REAL array, dimension (LDAF,N) */
+/* >     The factors L and U from the factorization */
+/* >     A = P*L*U as computed by SGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >     The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     The pivot indices from the factorization A = P*L*U */
+/* >     as computed by SGETRF; row i of the matrix was interchanged */
+/* >     with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] CMODE */
+/* > \verbatim */
+/* >          CMODE is INTEGER */
+/* >     Determines op2(C) in the formula op(A) * op2(C) as follows: */
+/* >     CMODE =  1    op2(C) = C */
+/* >     CMODE =  0    op2(C) = I */
+/* >     CMODE = -1    op2(C) = inv(C) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (N) */
+/* >     The vector C in the formula op(A) * op2(C). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. */
+/* >     i > 0:  The ith argument is invalid. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is REAL array, dimension (3*N). */
+/* >     Workspace. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N). */
+/* >     Workspace.2 */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+real sla_gercond_(char *trans, integer *n, real *a, integer *lda, real *af, 
+	integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer *
+	info, real *work, integer *iwork)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2;
+    real ret_val, r__1;
+
+    /* Local variables */
+    integer kase, i__, j;
+    extern logical lsame_(char *, char *);
+    integer isave[3];
+    extern /* Subroutine */ int slacn2_(integer *, real *, real *, integer *, 
+	    real *, integer *, integer *), xerbla_(char *, integer *, ftnlen);
+    real ainvnm;
+    extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, 
+	    integer *, integer *, real *, integer *, integer *);
+    real tmp;
+    logical notrans;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    --c__;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    ret_val = 0.f;
+
+    *info = 0;
+    notrans = lsame_(trans, "N");
+    if (! notrans && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -4;
+    } else if (*ldaf < f2cmax(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLA_GERCOND", &i__1, (ftnlen)11);
+	return ret_val;
+    }
+    if (*n == 0) {
+	ret_val = 1.f;
+	return ret_val;
+    }
+
+/*     Compute the equilibration matrix R such that */
+/*     inv(R)*A*C has unit 1-norm. */
+
+    if (notrans) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    tmp = 0.f;
+	    if (*cmode == 1) {
+		i__2 = *n;
+		for (j = 1; j <= i__2; ++j) {
+		    tmp += (r__1 = a[i__ + j * a_dim1] * c__[j], abs(r__1));
+		}
+	    } else if (*cmode == 0) {
+		i__2 = *n;
+		for (j = 1; j <= i__2; ++j) {
+		    tmp += (r__1 = a[i__ + j * a_dim1], abs(r__1));
+		}
+	    } else {
+		i__2 = *n;
+		for (j = 1; j <= i__2; ++j) {
+		    tmp += (r__1 = a[i__ + j * a_dim1] / c__[j], abs(r__1));
+		}
+	    }
+	    work[(*n << 1) + i__] = tmp;
+	}
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    tmp = 0.f;
+	    if (*cmode == 1) {
+		i__2 = *n;
+		for (j = 1; j <= i__2; ++j) {
+		    tmp += (r__1 = a[j + i__ * a_dim1] * c__[j], abs(r__1));
+		}
+	    } else if (*cmode == 0) {
+		i__2 = *n;
+		for (j = 1; j <= i__2; ++j) {
+		    tmp += (r__1 = a[j + i__ * a_dim1], abs(r__1));
+		}
+	    } else {
+		i__2 = *n;
+		for (j = 1; j <= i__2; ++j) {
+		    tmp += (r__1 = a[j + i__ * a_dim1] / c__[j], abs(r__1));
+		}
+	    }
+	    work[(*n << 1) + i__] = tmp;
+	}
+    }
+
+/*     Estimate the norm of inv(op(A)). */
+
+    ainvnm = 0.f;
+    kase = 0;
+L10:
+    slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
+    if (kase != 0) {
+	if (kase == 2) {
+
+/*           Multiply by R. */
+
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] *= work[(*n << 1) + i__];
+	    }
+	    if (notrans) {
+		sgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[
+			1], &work[1], n, info);
+	    } else {
+		sgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1],
+			 &work[1], n, info);
+	    }
+
+/*           Multiply by inv(C). */
+
+	    if (*cmode == 1) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    work[i__] /= c__[i__];
+		}
+	    } else if (*cmode == -1) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    work[i__] *= c__[i__];
+		}
+	    }
+	} else {
+
+/*           Multiply by inv(C**T). */
+
+	    if (*cmode == 1) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    work[i__] /= c__[i__];
+		}
+	    } else if (*cmode == -1) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    work[i__] *= c__[i__];
+		}
+	    }
+	    if (notrans) {
+		sgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1],
+			 &work[1], n, info);
+	    } else {
+		sgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[
+			1], &work[1], n, info);
+	    }
+
+/*           Multiply by R. */
+
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] *= work[(*n << 1) + i__];
+	    }
+	}
+	goto L10;
+    }
+
+/*     Compute the estimate of the reciprocal condition number. */
+
+    if (ainvnm != 0.f) {
+	ret_val = 1.f / ainvnm;
+    }
+
+    return ret_val;
+
+} /* sla_gercond__ */
+
diff --git a/lapack-netlib/SRC/sla_gerfsx_extended.c b/lapack-netlib/SRC/sla_gerfsx_extended.c
new file mode 100644
index 000000000..c1446ca91
--- /dev/null
+++ b/lapack-netlib/SRC/sla_gerfsx_extended.c
@@ -0,0 +1,1119 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b6 = -1.f;
+static real c_b8 = 1.f;
+
+/* > \brief \b SLA_GERFSX_EXTENDED improves the computed solution to a system of linear equations for general 
+matrices by performing extra-precise iterative refinement and provides error bounds and backward error
+ estimates for the solution. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SLA_GERFSX_EXTENDED + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_ger
+fsx_extended.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_ger
+fsx_extended.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_ger
+fsx_extended.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE SLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, */
+/*                                       LDA, AF, LDAF, IPIV, COLEQU, C, B, */
+/*                                       LDB, Y, LDY, BERR_OUT, N_NORMS, */
+/*                                       ERRS_N, ERRS_C, RES, */
+/*                                       AYB, DY, Y_TAIL, RCOND, ITHRESH, */
+/*                                       RTHRESH, DZ_UB, IGNORE_CWISE, */
+/*                                       INFO ) */
+
+/*       INTEGER            INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, */
+/*      $                   TRANS_TYPE, N_NORMS, ITHRESH */
+/*       LOGICAL            COLEQU, IGNORE_CWISE */
+/*       REAL               RTHRESH, DZ_UB */
+/*       INTEGER            IPIV( * ) */
+/*       REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
+/*      $                   Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) */
+/*       REAL               C( * ), AYB( * ), RCOND, BERR_OUT( * ), */
+/*      $                   ERRS_N( NRHS, * ), */
+/*      $                   ERRS_C( NRHS, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLA_GERFSX_EXTENDED improves the computed solution to a system of */
+/* > linear equations by performing extra-precise iterative refinement */
+/* > and provides error bounds and backward error estimates for the solution. */
+/* > This subroutine is called by SGERFSX to perform iterative refinement. */
+/* > In addition to normwise error bound, the code provides maximum */
+/* > componentwise error bound if possible. See comments for ERRS_N */
+/* > and ERRS_C for details of the error bounds. Note that this */
+/* > subroutine is only resonsible for setting the second fields of */
+/* > ERRS_N and ERRS_C. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] PREC_TYPE */
+/* > \verbatim */
+/* >          PREC_TYPE is INTEGER */
+/* >     Specifies the intermediate precision to be used in refinement. */
+/* >     The value is defined by ILAPREC(P) where P is a CHARACTER and P */
+/* >          = 'S':  Single */
+/* >          = 'D':  Double */
+/* >          = 'I':  Indigenous */
+/* >          = 'X' or 'E':  Extra */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS_TYPE */
+/* > \verbatim */
+/* >          TRANS_TYPE is INTEGER */
+/* >     Specifies the transposition operation on A. */
+/* >     The value is defined by ILATRANS(T) where T is a CHARACTER and T */
+/* >          = 'N':  No transpose */
+/* >          = 'T':  Transpose */
+/* >          = 'C':  Conjugate transpose */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The number of linear equations, i.e., the order of the */
+/* >     matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >     The number of right-hand-sides, i.e., the number of columns of the */
+/* >     matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >     On entry, the N-by-N matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >     The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AF */
+/* > \verbatim */
+/* >          AF is REAL array, dimension (LDAF,N) */
+/* >     The factors L and U from the factorization */
+/* >     A = P*L*U as computed by SGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >     The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     The pivot indices from the factorization A = P*L*U */
+/* >     as computed by SGETRF; row i of the matrix was interchanged */
+/* >     with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] COLEQU */
+/* > \verbatim */
+/* >          COLEQU is LOGICAL */
+/* >     If .TRUE. then column equilibration was done to A before calling */
+/* >     this routine. This is needed to compute the solution and error */
+/* >     bounds correctly. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] C */
+/* > \verbatim */
+/* >          C is REAL array, dimension (N) */
+/* >     The column scale factors for A. If COLEQU = .FALSE., C */
+/* >     is not accessed. If C is input, each element of C should be a power */
+/* >     of the radix to ensure a reliable solution and error estimates. */
+/* >     Scaling by powers of the radix does not cause rounding errors unless */
+/* >     the result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is REAL array, dimension (LDB,NRHS) */
+/* >     The right-hand-side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >     The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] Y */
+/* > \verbatim */
+/* >          Y is REAL array, dimension (LDY,NRHS) */
+/* >     On entry, the solution matrix X, as computed by SGETRS. */
+/* >     On exit, the improved solution matrix Y. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDY */
+/* > \verbatim */
+/* >          LDY is INTEGER */
+/* >     The leading dimension of the array Y.  LDY >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR_OUT */
+/* > \verbatim */
+/* >          BERR_OUT is REAL array, dimension (NRHS) */
+/* >     On exit, BERR_OUT(j) contains the componentwise relative backward */
+/* >     error for right-hand-side j from the formula */
+/* >         f2cmax(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
+/* >     where abs(Z) is the componentwise absolute value of the matrix */
+/* >     or vector Z. This is computed by SLA_LIN_BERR. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N_NORMS */
+/* > \verbatim */
+/* >          N_NORMS is INTEGER */
+/* >     Determines which error bounds to return (see ERRS_N */
+/* >     and ERRS_C). */
+/* >     If N_NORMS >= 1 return normwise error bounds. */
+/* >     If N_NORMS >= 2 return componentwise error bounds. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] ERRS_N */
+/* > \verbatim */
+/* >          ERRS_N is REAL array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     normwise relative error, which is defined as follows: */
+/* > */
+/* >     Normwise relative error in the ith solution vector: */
+/* >             max_j (abs(XTRUE(j,i) - X(j,i))) */
+/* >            ------------------------------ */
+/* >                  max_j abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the type of error information as described */
+/* >     below. There currently are up to three pieces of information */
+/* >     returned. */
+/* > */
+/* >     The first index in ERRS_N(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERRS_N(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated normwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * slamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*A, where S scales each row by a power of the */
+/* >              radix so all absolute row sums of Z are approximately 1. */
+/* > */
+/* >     This subroutine is only responsible for setting the second field */
+/* >     above. */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] ERRS_C */
+/* > \verbatim */
+/* >          ERRS_C is REAL array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     componentwise relative error, which is defined as follows: */
+/* > */
+/* >     Componentwise relative error in the ith solution vector: */
+/* >                    abs(XTRUE(j,i) - X(j,i)) */
+/* >             max_j ---------------------- */
+/* >                         abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the right-hand side i (on which the */
+/* >     componentwise relative error depends), and the type of error */
+/* >     information as described below. There currently are up to three */
+/* >     pieces of information returned for each right-hand side. If */
+/* >     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
+/* >     ERRS_C is not accessed.  If N_ERR_BNDS < 3, then at most */
+/* >     the first (:,N_ERR_BNDS) entries are returned. */
+/* > */
+/* >     The first index in ERRS_C(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERRS_C(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * slamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated componentwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * slamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*(A*diag(x)), where x is the solution for the */
+/* >              current right-hand side and S scales each row of */
+/* >              A*diag(x) by a power of the radix so all absolute row */
+/* >              sums of Z are approximately 1. */
+/* > */
+/* >     This subroutine is only responsible for setting the second field */
+/* >     above. */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RES */
+/* > \verbatim */
+/* >          RES is REAL array, dimension (N) */
+/* >     Workspace to hold the intermediate residual. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AYB */
+/* > \verbatim */
+/* >          AYB is REAL array, dimension (N) */
+/* >     Workspace. This can be the same workspace passed for Y_TAIL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DY */
+/* > \verbatim */
+/* >          DY is REAL array, dimension (N) */
+/* >     Workspace to hold the intermediate solution. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] Y_TAIL */
+/* > \verbatim */
+/* >          Y_TAIL is REAL array, dimension (N) */
+/* >     Workspace to hold the trailing bits of the intermediate solution. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RCOND */
+/* > \verbatim */
+/* >          RCOND is REAL */
+/* >     Reciprocal scaled condition number.  This is an estimate of the */
+/* >     reciprocal Skeel condition number of the matrix A after */
+/* >     equilibration (if done).  If this is less than the machine */
+/* >     precision (in particular, if it is zero), the matrix is singular */
+/* >     to working precision.  Note that the error may still be small even */
+/* >     if this number is very small and the matrix appears ill- */
+/* >     conditioned. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ITHRESH */
+/* > \verbatim */
+/* >          ITHRESH is INTEGER */
+/* >     The maximum number of residual computations allowed for */
+/* >     refinement. The default is 10. For 'aggressive' set to 100 to */
+/* >     permit convergence using approximate factorizations or */
+/* >     factorizations other than LU. If the factorization uses a */
+/* >     technique other than Gaussian elimination, the guarantees in */
+/* >     ERRS_N and ERRS_C may no longer be trustworthy. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RTHRESH */
+/* > \verbatim */
+/* >          RTHRESH is REAL */
+/* >     Determines when to stop refinement if the error estimate stops */
+/* >     decreasing. Refinement will stop when the next solution no longer */
+/* >     satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */
+/* >     the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */
+/* >     default value is 0.5. For 'aggressive' set to 0.9 to permit */
+/* >     convergence on extremely ill-conditioned matrices. See LAWN 165 */
+/* >     for more details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] DZ_UB */
+/* > \verbatim */
+/* >          DZ_UB is REAL */
+/* >     Determines when to start considering componentwise convergence. */
+/* >     Componentwise convergence is only considered after each component */
+/* >     of the solution Y is stable, which we definte as the relative */
+/* >     change in each component being less than DZ_UB. The default value */
+/* >     is 0.25, requiring the first bit to be stable. See LAWN 165 for */
+/* >     more details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IGNORE_CWISE */
+/* > \verbatim */
+/* >          IGNORE_CWISE is LOGICAL */
+/* >     If .TRUE. then ignore componentwise convergence. Default value */
+/* >     is .FALSE.. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. */
+/* >       < 0:  if INFO = -i, the ith argument to SGETRS had an illegal */
+/* >             value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int sla_gerfsx_extended_(integer *prec_type__, integer *
+	trans_type__, integer *n, integer *nrhs, real *a, integer *lda, real *
+	af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, real *b,
+	 integer *ldb, real *y, integer *ldy, real *berr_out__, integer *
+	n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real 
+	*dy, real *y_tail__, real *rcond, integer *ithresh, real *rthresh, 
+	real *dz_ub__, logical *ignore_cwise__, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, 
+	    y_offset, errs_n_dim1, errs_n_offset, errs_c_dim1, errs_c_offset, 
+	    i__1, i__2, i__3;
+    real r__1, r__2;
+    char ch__1[1];
+
+    /* Local variables */
+    real dx_x__, dz_z__, ymin;
+    extern /* Subroutine */ int sla_lin_berr_(integer *, integer *, integer *
+	    , real *, real *, real *);
+    real dxratmax;
+    extern /* Subroutine */ int blas_sgemv_x_(integer *, integer *, integer *
+	    , real *, real *, integer *, real *, integer *, real *, real *, 
+	    integer *, integer *);
+    real dzratmax;
+    integer y_prec_state__, i__, j;
+    extern /* Subroutine */ int blas_sgemv2_x_(integer *, integer *, integer 
+	    *, real *, real *, integer *, real *, real *, integer *, real *, 
+	    real *, integer *, integer *), sla_geamv_(integer *, integer *, 
+	    integer *, real *, real *, integer *, real *, integer *, real *, 
+	    real *, integer *), sgemv_(char *, integer *, integer *, real *, 
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    real dxrat;
+    logical incr_prec__;
+    real dzrat;
+    char trans[1];
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
+	    integer *);
+    real normx, normy;
+    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
+	    real *, integer *);
+    real myhugeval, prev_dz_z__, yk;
+    extern real slamch_(char *);
+    real final_dx_x__, final_dz_z__, normdx;
+    extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, 
+	    integer *, integer *, real *, integer *, integer *), 
+	    sla_wwaddw_(integer *, real *, real *, real *);
+    extern /* Character */ VOID chla_transtype_(char *, integer *);
+    real prevnormdx;
+    integer cnt;
+    real dyk, eps;
+    integer x_state__, z_state__;
+    real incr_thresh__;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    errs_c_dim1 = *nrhs;
+    errs_c_offset = 1 + errs_c_dim1 * 1;
+    errs_c__ -= errs_c_offset;
+    errs_n_dim1 = *nrhs;
+    errs_n_offset = 1 + errs_n_dim1 * 1;
+    errs_n__ -= errs_n_offset;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1 * 1;
+    y -= y_offset;
+    --berr_out__;
+    --res;
+    --ayb;
+    --dy;
+    --y_tail__;
+
+    /* Function Body */
+    if (*info != 0) {
+	return 0;
+    }
+    chla_transtype_(ch__1, trans_type__);
+    *(unsigned char *)trans = *(unsigned char *)&ch__1[0];
+    eps = slamch_("Epsilon");
+    myhugeval = slamch_("Overflow");
+/*     Force MYHUGEVAL to Inf */
+    myhugeval *= myhugeval;
+/*     Using MYHUGEVAL may lead to spurious underflows. */
+    incr_thresh__ = (real) (*n) * eps;
+
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+	y_prec_state__ = 1;
+	if (y_prec_state__ == 2) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		y_tail__[i__] = 0.f;
+	    }
+	}
+	dxrat = 0.f;
+	dxratmax = 0.f;
+	dzrat = 0.f;
+	dzratmax = 0.f;
+	final_dx_x__ = myhugeval;
+	final_dz_z__ = myhugeval;
+	prevnormdx = myhugeval;
+	prev_dz_z__ = myhugeval;
+	dz_z__ = myhugeval;
+	dx_x__ = myhugeval;
+	x_state__ = 1;
+	z_state__ = 0;
+	incr_prec__ = FALSE_;
+	i__2 = *ithresh;
+	for (cnt = 1; cnt <= i__2; ++cnt) {
+
+/*         Compute residual RES = B_s - op(A_s) * Y, */
+/*             op(A) = A, A**T, or A**H depending on TRANS (and type). */
+
+	    scopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
+	    if (y_prec_state__ == 0) {
+		sgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 
+			1], &c__1, &c_b8, &res[1], &c__1);
+	    } else if (y_prec_state__ == 1) {
+		blas_sgemv_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, &
+			y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1, 
+			prec_type__);
+	    } else {
+		blas_sgemv2_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, 
+			&y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b8, &res[
+			1], &c__1, prec_type__);
+	    }
+/*        XXX: RES is no longer needed. */
+	    scopy_(n, &res[1], &c__1, &dy[1], &c__1);
+	    sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &dy[1], 
+		    n, info);
+
+/*         Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */
+
+	    normx = 0.f;
+	    normy = 0.f;
+	    normdx = 0.f;
+	    dz_z__ = 0.f;
+	    ymin = myhugeval;
+
+	    i__3 = *n;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		yk = (r__1 = y[i__ + j * y_dim1], abs(r__1));
+		dyk = (r__1 = dy[i__], abs(r__1));
+		if (yk != 0.f) {
+/* Computing MAX */
+		    r__1 = dz_z__, r__2 = dyk / yk;
+		    dz_z__ = f2cmax(r__1,r__2);
+		} else if (dyk != 0.f) {
+		    dz_z__ = myhugeval;
+		}
+		ymin = f2cmin(ymin,yk);
+		normy = f2cmax(normy,yk);
+		if (*colequ) {
+/* Computing MAX */
+		    r__1 = normx, r__2 = yk * c__[i__];
+		    normx = f2cmax(r__1,r__2);
+/* Computing MAX */
+		    r__1 = normdx, r__2 = dyk * c__[i__];
+		    normdx = f2cmax(r__1,r__2);
+		} else {
+		    normx = normy;
+		    normdx = f2cmax(normdx,dyk);
+		}
+	    }
+	    if (normx != 0.f) {
+		dx_x__ = normdx / normx;
+	    } else if (normdx == 0.f) {
+		dx_x__ = 0.f;
+	    } else {
+		dx_x__ = myhugeval;
+	    }
+	    dxrat = normdx / prevnormdx;
+	    dzrat = dz_z__ / prev_dz_z__;
+
+/*         Check termination criteria */
+
+	    if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy 
+		    && y_prec_state__ < 2) {
+		incr_prec__ = TRUE_;
+	    }
+	    if (x_state__ == 3 && dxrat <= *rthresh) {
+		x_state__ = 1;
+	    }
+	    if (x_state__ == 1) {
+		if (dx_x__ <= eps) {
+		    x_state__ = 2;
+		} else if (dxrat > *rthresh) {
+		    if (y_prec_state__ != 2) {
+			incr_prec__ = TRUE_;
+		    } else {
+			x_state__ = 3;
+		    }
+		} else {
+		    if (dxrat > dxratmax) {
+			dxratmax = dxrat;
+		    }
+		}
+		if (x_state__ > 1) {
+		    final_dx_x__ = dx_x__;
+		}
+	    }
+	    if (z_state__ == 0 && dz_z__ <= *dz_ub__) {
+		z_state__ = 1;
+	    }
+	    if (z_state__ == 3 && dzrat <= *rthresh) {
+		z_state__ = 1;
+	    }
+	    if (z_state__ == 1) {
+		if (dz_z__ <= eps) {
+		    z_state__ = 2;
+		} else if (dz_z__ > *dz_ub__) {
+		    z_state__ = 0;
+		    dzratmax = 0.f;
+		    final_dz_z__ = myhugeval;
+		} else if (dzrat > *rthresh) {
+		    if (y_prec_state__ != 2) {
+			incr_prec__ = TRUE_;
+		    } else {
+			z_state__ = 3;
+		    }
+		} else {
+		    if (dzrat > dzratmax) {
+			dzratmax = dzrat;
+		    }
+		}
+		if (z_state__ > 1) {
+		    final_dz_z__ = dz_z__;
+		}
+	    }
+
+/*           Exit if both normwise and componentwise stopped working, */
+/*           but if componentwise is unstable, let it go at least two */
+/*           iterations. */
+
+	    if (x_state__ != 1) {
+		if (*ignore_cwise__) {
+		    goto L666;
+		}
+		if (z_state__ == 3 || z_state__ == 2) {
+		    goto L666;
+		}
+		if (z_state__ == 0 && cnt > 1) {
+		    goto L666;
+		}
+	    }
+	    if (incr_prec__) {
+		incr_prec__ = FALSE_;
+		++y_prec_state__;
+		i__3 = *n;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    y_tail__[i__] = 0.f;
+		}
+	    }
+	    prevnormdx = normdx;
+	    prev_dz_z__ = dz_z__;
+
+/*           Update soluton. */
+
+	    if (y_prec_state__ < 2) {
+		saxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1);
+	    } else {
+		sla_wwaddw_(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]);
+	    }
+	}
+/*        Target of "IF (Z_STOP .AND. X_STOP)".  Sun's f77 won't CALL MYEXIT. */
+L666:
+
+/*     Set final_* when cnt hits ithresh. */
+
+	if (x_state__ == 1) {
+	    final_dx_x__ = dx_x__;
+	}
+	if (z_state__ == 1) {
+	    final_dz_z__ = dz_z__;
+	}
+
+/*     Compute error bounds */
+
+	if (*n_norms__ >= 1) {
+	    errs_n__[j + (errs_n_dim1 << 1)] = final_dx_x__ / (1 - dxratmax);
+	}
+	if (*n_norms__ >= 2) {
+	    errs_c__[j + (errs_c_dim1 << 1)] = final_dz_z__ / (1 - dzratmax);
+	}
+
+/*     Compute componentwise relative backward error from formula */
+/*         f2cmax(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
+/*     where abs(Z) is the componentwise absolute value of the matrix */
+/*     or vector Z. */
+
+/*         Compute residual RES = B_s - op(A_s) * Y, */
+/*             op(A) = A, A**T, or A**H depending on TRANS (and type). */
+
+	scopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
+	sgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 1], &
+		c__1, &c_b8, &res[1], &c__1);
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    ayb[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1));
+	}
+
+/*     Compute abs(op(A_s))*abs(Y) + abs(B_s). */
+
+	sla_geamv_(trans_type__, n, n, &c_b8, &a[a_offset], lda, &y[j * 
+		y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1);
+	sla_lin_berr_(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]);
+
+/*     End of loop for each RHS. */
+
+    }
+
+    return 0;
+} /* sla_gerfsx_extended__ */
+
diff --git a/lapack-netlib/SRC/sla_gerpvgrw.c b/lapack-netlib/SRC/sla_gerpvgrw.c
new file mode 100644
index 000000000..0fc93c576
--- /dev/null
+++ b/lapack-netlib/SRC/sla_gerpvgrw.c
@@ -0,0 +1,542 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b SLA_GERPVGRW */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download SLA_GERPVGRW + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sla_ger
+pvgrw.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sla_ger
+pvgrw.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sla_ger
+pvgrw.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       REAL FUNCTION SLA_GERPVGRW( N, NCOLS, A, LDA, AF, LDAF ) */
+
+/*       INTEGER            N, NCOLS, LDA, LDAF */
+/*       REAL               A( LDA, * ), AF( LDAF, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > SLA_GERPVGRW computes the reciprocal pivot growth factor */
+/* > norm(A)/norm(U). The "f2cmax absolute element" norm is used. If this is */
+/* > much less than 1, the stability of the LU factorization of the */
+/* > (equilibrated) matrix A could be poor. This also means that the */
+/* > solution X, estimated condition numbers, and error bounds could be */
+/* > unreliable. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The number of linear equations, i.e., the order of the */
+/* >     matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NCOLS */
+/* > \verbatim */
+/* >          NCOLS is INTEGER */
+/* >     The number of columns of the matrix A. NCOLS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is REAL array, dimension (LDA,N) */
+/* >     On entry, the N-by-N matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >     The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AF */
+/* > \verbatim */
+/* >          AF is REAL array, dimension (LDAF,N) */
+/* >     The factors L and U from the factorization */
+/* >     A = P*L*U as computed by SGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >     The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup realGEcomputational */
+
+/*  ===================================================================== */
+real sla_gerpvgrw_(integer *n, integer *ncols, real *a, integer *lda, real *
+	af, integer *ldaf)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2;
+    real ret_val, r__1, r__2;
+
+    /* Local variables */
+    real amax, umax;
+    integer i__, j;
+    real rpvgrw;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+
+    /* Function Body */
+    rpvgrw = 1.f;
+    i__1 = *ncols;
+    for (j = 1; j <= i__1; ++j) {
+	amax = 0.f;
+	umax = 0.f;
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    r__2 = (r__1 = a[i__ + j * a_dim1], abs(r__1));
+	    amax = f2cmax(r__2,amax);
+	}
+	i__2 = j;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    r__2 = (r__1 = af[i__ + j * af_dim1], abs(r__1));
+	    umax = f2cmax(r__2,umax);
+	}
+	if (umax != 0.f) {
+/* Computing MIN */
+	    r__1 = amax / umax;
+	    rpvgrw = f2cmin(r__1,rpvgrw);
+	}
+    }
+    ret_val = rpvgrw;
+    return ret_val;
+} /* sla_gerpvgrw__ */
+