From 1ed9c0071a320dae3f5bf555e4bc7a230b58a71b Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 23 Feb 2022 00:06:28 +0100
Subject: [PATCH] Add C versions as fallback

---
 lapack-netlib/SRC/dbbcsd.c   | 1676 +++++++++++++
 lapack-netlib/SRC/dbdsdc.c   |  969 ++++++++
 lapack-netlib/SRC/dbdsqr.c   | 1397 +++++++++++
 lapack-netlib/SRC/dbdsvdx.c  | 1349 ++++++++++
 lapack-netlib/SRC/dcombssq.c |  486 ++++
 lapack-netlib/SRC/ddisna.c   |  651 +++++
 lapack-netlib/SRC/dgbbrd.c   | 1033 ++++++++
 lapack-netlib/SRC/dgbcon.c   |  725 ++++++
 lapack-netlib/SRC/dgbequ.c   |  764 ++++++
 lapack-netlib/SRC/dgbequb.c  |  783 ++++++
 lapack-netlib/SRC/dgbrfs.c   |  919 +++++++
 lapack-netlib/SRC/dgbrfsx.c  | 1181 +++++++++
 lapack-netlib/SRC/dgbsv.c    |  622 +++++
 lapack-netlib/SRC/dgbsvx.c   | 1138 +++++++++
 lapack-netlib/SRC/dgbsvxx.c  | 1249 ++++++++++
 lapack-netlib/SRC/dgbtf2.c   |  698 ++++++
 lapack-netlib/SRC/dgbtrf.c   | 1021 ++++++++
 lapack-netlib/SRC/dgbtrs.c   |  686 ++++++
 lapack-netlib/SRC/dgebak.c   |  675 +++++
 lapack-netlib/SRC/dgebal.c   |  840 +++++++
 lapack-netlib/SRC/dgebd2.c   |  745 ++++++
 lapack-netlib/SRC/dgebrd.c   |  784 ++++++
 lapack-netlib/SRC/dgecon.c   |  658 +++++
 lapack-netlib/SRC/dgeequ.c   |  733 ++++++
 lapack-netlib/SRC/dgeequb.c  |  753 ++++++
 lapack-netlib/SRC/dgees.c    | 1002 ++++++++
 lapack-netlib/SRC/dgeesx.c   | 1124 +++++++++
 lapack-netlib/SRC/dgeev.c    | 1039 ++++++++
 lapack-netlib/SRC/dgeevx.c   | 1206 +++++++++
 lapack-netlib/SRC/dgehd2.c   |  628 +++++
 lapack-netlib/SRC/dgehrd.c   |  789 ++++++
 lapack-netlib/SRC/dgejsv.c   | 2682 ++++++++++++++++++++
 lapack-netlib/SRC/dgelq.c    |  745 ++++++
 lapack-netlib/SRC/dgelq2.c   |  597 +++++
 lapack-netlib/SRC/dgelqf.c   |  700 ++++++
 lapack-netlib/SRC/dgelqt.c   |  621 +++++
 lapack-netlib/SRC/dgelqt3.c  |  679 ++++++
 lapack-netlib/SRC/dgels.c    |  956 ++++++++
 lapack-netlib/SRC/dgelsd.c   | 1153 +++++++++
 lapack-netlib/SRC/dgelss.c   | 1318 ++++++++++
 lapack-netlib/SRC/dgelsy.c   |  945 +++++++
 lapack-netlib/SRC/dgemlq.c   |  684 ++++++
 lapack-netlib/SRC/dgemlqt.c  |  707 ++++++
 lapack-netlib/SRC/dgemqr.c   |  685 ++++++
 lapack-netlib/SRC/dgemqrt.c  |  708 ++++++
 lapack-netlib/SRC/dgeql2.c   |  591 +++++
 lapack-netlib/SRC/dgeqlf.c   |  711 ++++++
 lapack-netlib/SRC/dgeqp3.c   |  795 ++++++
 lapack-netlib/SRC/dgeqr.c    |  735 ++++++
 lapack-netlib/SRC/dgeqr2.c   |  602 +++++
 lapack-netlib/SRC/dgeqr2p.c  |  607 +++++
 lapack-netlib/SRC/dgeqrf.c   |  702 ++++++
 lapack-netlib/SRC/dgeqrfp.c  |  705 ++++++
 lapack-netlib/SRC/dgeqrt.c   |  631 +++++
 lapack-netlib/SRC/dgeqrt2.c  |  648 +++++
 lapack-netlib/SRC/dgeqrt3.c  |  681 ++++++
 lapack-netlib/SRC/dgerfs.c   |  882 +++++++
 lapack-netlib/SRC/dgerfsx.c  | 1148 +++++++++
 lapack-netlib/SRC/dgerq2.c   |  587 +++++
 lapack-netlib/SRC/dgerqf.c   |  710 ++++++
 lapack-netlib/SRC/dgesc2.c   |  604 +++++
 lapack-netlib/SRC/dgesdd.c   | 2167 ++++++++++++++++
 lapack-netlib/SRC/dgesv.c    |  574 +++++
 lapack-netlib/SRC/dgesvd.c   | 4475 ++++++++++++++++++++++++++++++++++
 lapack-netlib/SRC/dgesvdq.c  | 2128 ++++++++++++++++
 lapack-netlib/SRC/dgesvdx.c  | 1342 ++++++++++
 lapack-netlib/SRC/dgesvj.c   | 2233 +++++++++++++++++
 lapack-netlib/SRC/dgesvx.c   | 1068 ++++++++
 lapack-netlib/SRC/dgesvxx.c  | 1211 +++++++++
 lapack-netlib/SRC/dgetc2.c   |  646 +++++
 lapack-netlib/SRC/dgetf2.c   |  620 +++++
 lapack-netlib/SRC/dgetrf.c   |  645 +++++
 lapack-netlib/SRC/dgetrf2.c  |  683 ++++++
 lapack-netlib/SRC/dgetri.c   |  694 ++++++
 lapack-netlib/SRC/dgetrs.c   |  620 +++++
 lapack-netlib/SRC/dgetsls.c  |  931 +++++++
 76 files changed, 72879 insertions(+)
 create mode 100644 lapack-netlib/SRC/dbbcsd.c
 create mode 100644 lapack-netlib/SRC/dbdsdc.c
 create mode 100644 lapack-netlib/SRC/dbdsqr.c
 create mode 100644 lapack-netlib/SRC/dbdsvdx.c
 create mode 100644 lapack-netlib/SRC/dcombssq.c
 create mode 100644 lapack-netlib/SRC/ddisna.c
 create mode 100644 lapack-netlib/SRC/dgbbrd.c
 create mode 100644 lapack-netlib/SRC/dgbcon.c
 create mode 100644 lapack-netlib/SRC/dgbequ.c
 create mode 100644 lapack-netlib/SRC/dgbequb.c
 create mode 100644 lapack-netlib/SRC/dgbrfs.c
 create mode 100644 lapack-netlib/SRC/dgbrfsx.c
 create mode 100644 lapack-netlib/SRC/dgbsv.c
 create mode 100644 lapack-netlib/SRC/dgbsvx.c
 create mode 100644 lapack-netlib/SRC/dgbsvxx.c
 create mode 100644 lapack-netlib/SRC/dgbtf2.c
 create mode 100644 lapack-netlib/SRC/dgbtrf.c
 create mode 100644 lapack-netlib/SRC/dgbtrs.c
 create mode 100644 lapack-netlib/SRC/dgebak.c
 create mode 100644 lapack-netlib/SRC/dgebal.c
 create mode 100644 lapack-netlib/SRC/dgebd2.c
 create mode 100644 lapack-netlib/SRC/dgebrd.c
 create mode 100644 lapack-netlib/SRC/dgecon.c
 create mode 100644 lapack-netlib/SRC/dgeequ.c
 create mode 100644 lapack-netlib/SRC/dgeequb.c
 create mode 100644 lapack-netlib/SRC/dgees.c
 create mode 100644 lapack-netlib/SRC/dgeesx.c
 create mode 100644 lapack-netlib/SRC/dgeev.c
 create mode 100644 lapack-netlib/SRC/dgeevx.c
 create mode 100644 lapack-netlib/SRC/dgehd2.c
 create mode 100644 lapack-netlib/SRC/dgehrd.c
 create mode 100644 lapack-netlib/SRC/dgejsv.c
 create mode 100644 lapack-netlib/SRC/dgelq.c
 create mode 100644 lapack-netlib/SRC/dgelq2.c
 create mode 100644 lapack-netlib/SRC/dgelqf.c
 create mode 100644 lapack-netlib/SRC/dgelqt.c
 create mode 100644 lapack-netlib/SRC/dgelqt3.c
 create mode 100644 lapack-netlib/SRC/dgels.c
 create mode 100644 lapack-netlib/SRC/dgelsd.c
 create mode 100644 lapack-netlib/SRC/dgelss.c
 create mode 100644 lapack-netlib/SRC/dgelsy.c
 create mode 100644 lapack-netlib/SRC/dgemlq.c
 create mode 100644 lapack-netlib/SRC/dgemlqt.c
 create mode 100644 lapack-netlib/SRC/dgemqr.c
 create mode 100644 lapack-netlib/SRC/dgemqrt.c
 create mode 100644 lapack-netlib/SRC/dgeql2.c
 create mode 100644 lapack-netlib/SRC/dgeqlf.c
 create mode 100644 lapack-netlib/SRC/dgeqp3.c
 create mode 100644 lapack-netlib/SRC/dgeqr.c
 create mode 100644 lapack-netlib/SRC/dgeqr2.c
 create mode 100644 lapack-netlib/SRC/dgeqr2p.c
 create mode 100644 lapack-netlib/SRC/dgeqrf.c
 create mode 100644 lapack-netlib/SRC/dgeqrfp.c
 create mode 100644 lapack-netlib/SRC/dgeqrt.c
 create mode 100644 lapack-netlib/SRC/dgeqrt2.c
 create mode 100644 lapack-netlib/SRC/dgeqrt3.c
 create mode 100644 lapack-netlib/SRC/dgerfs.c
 create mode 100644 lapack-netlib/SRC/dgerfsx.c
 create mode 100644 lapack-netlib/SRC/dgerq2.c
 create mode 100644 lapack-netlib/SRC/dgerqf.c
 create mode 100644 lapack-netlib/SRC/dgesc2.c
 create mode 100644 lapack-netlib/SRC/dgesdd.c
 create mode 100644 lapack-netlib/SRC/dgesv.c
 create mode 100644 lapack-netlib/SRC/dgesvd.c
 create mode 100644 lapack-netlib/SRC/dgesvdq.c
 create mode 100644 lapack-netlib/SRC/dgesvdx.c
 create mode 100644 lapack-netlib/SRC/dgesvj.c
 create mode 100644 lapack-netlib/SRC/dgesvx.c
 create mode 100644 lapack-netlib/SRC/dgesvxx.c
 create mode 100644 lapack-netlib/SRC/dgetc2.c
 create mode 100644 lapack-netlib/SRC/dgetf2.c
 create mode 100644 lapack-netlib/SRC/dgetrf.c
 create mode 100644 lapack-netlib/SRC/dgetrf2.c
 create mode 100644 lapack-netlib/SRC/dgetri.c
 create mode 100644 lapack-netlib/SRC/dgetrs.c
 create mode 100644 lapack-netlib/SRC/dgetsls.c

diff --git a/lapack-netlib/SRC/dbbcsd.c b/lapack-netlib/SRC/dbbcsd.c
new file mode 100644
index 000000000..7524cf43e
--- /dev/null
+++ b/lapack-netlib/SRC/dbbcsd.c
@@ -0,0 +1,1676 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static doublereal c_b10 = -.125;
+static doublereal c_b35 = -1.;
+static integer c__1 = 1;
+
+/* > \brief \b DBBCSD */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DBBCSD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbbcsd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbbcsd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbbcsd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, */
+/*                          THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, */
+/*                          V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, */
+/*                          B22D, B22E, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS */
+/*       INTEGER            INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q */
+/*       DOUBLE PRECISION   B11D( * ), B11E( * ), B12D( * ), B12E( * ), */
+/*      $                   B21D( * ), B21E( * ), B22D( * ), B22E( * ), */
+/*      $                   PHI( * ), THETA( * ), WORK( * ) */
+/*       DOUBLE PRECISION   U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), */
+/*      $                   V2T( LDV2T, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DBBCSD computes the CS decomposition of an orthogonal matrix in */
+/* > bidiagonal-block form, */
+/* > */
+/* > */
+/* >     [ B11 | B12 0  0 ] */
+/* >     [  0  |  0 -I  0 ] */
+/* > X = [----------------] */
+/* >     [ B21 | B22 0  0 ] */
+/* >     [  0  |  0  0  I ] */
+/* > */
+/* >                               [  C | -S  0  0 ] */
+/* >                   [ U1 |    ] [  0 |  0 -I  0 ] [ V1 |    ]**T */
+/* >                 = [---------] [---------------] [---------]   . */
+/* >                   [    | U2 ] [  S |  C  0  0 ] [    | V2 ] */
+/* >                               [  0 |  0  0  I ] */
+/* > */
+/* > X is M-by-M, its top-left block is P-by-Q, and Q must be no larger */
+/* > than P, M-P, or M-Q. (If Q is not the smallest index, then X must be */
+/* > transposed and/or permuted. This can be done in constant time using */
+/* > the TRANS and SIGNS options. See DORCSD for details.) */
+/* > */
+/* > The bidiagonal matrices B11, B12, B21, and B22 are represented */
+/* > implicitly by angles THETA(1:Q) and PHI(1:Q-1). */
+/* > */
+/* > The orthogonal matrices U1, U2, V1T, and V2T are input/output. */
+/* > The input matrices are pre- or post-multiplied by the appropriate */
+/* > singular vector matrices. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBU1 */
+/* > \verbatim */
+/* >          JOBU1 is CHARACTER */
+/* >          = 'Y':      U1 is updated; */
+/* >          otherwise:  U1 is not updated. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBU2 */
+/* > \verbatim */
+/* >          JOBU2 is CHARACTER */
+/* >          = 'Y':      U2 is updated; */
+/* >          otherwise:  U2 is not updated. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV1T */
+/* > \verbatim */
+/* >          JOBV1T is CHARACTER */
+/* >          = 'Y':      V1T is updated; */
+/* >          otherwise:  V1T is not updated. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV2T */
+/* > \verbatim */
+/* >          JOBV2T is CHARACTER */
+/* >          = 'Y':      V2T is updated; */
+/* >          otherwise:  V2T is not updated. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER */
+/* >          = 'T':      X, U1, U2, V1T, and V2T are stored in row-major */
+/* >                      order; */
+/* >          otherwise:  X, U1, U2, V1T, and V2T are stored in column- */
+/* >                      major order. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows and columns in X, the orthogonal matrix in */
+/* >          bidiagonal-block form. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] P */
+/* > \verbatim */
+/* >          P is INTEGER */
+/* >          The number of rows in the top-left block of X. 0 <= P <= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] Q */
+/* > \verbatim */
+/* >          Q is INTEGER */
+/* >          The number of columns in the top-left block of X. */
+/* >          0 <= Q <= MIN(P,M-P,M-Q). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] THETA */
+/* > \verbatim */
+/* >          THETA is DOUBLE PRECISION array, dimension (Q) */
+/* >          On entry, the angles THETA(1),...,THETA(Q) that, along with */
+/* >          PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block */
+/* >          form. On exit, the angles whose cosines and sines define the */
+/* >          diagonal blocks in the CS decomposition. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] PHI */
+/* > \verbatim */
+/* >          PHI is DOUBLE PRECISION array, dimension (Q-1) */
+/* >          The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),..., */
+/* >          THETA(Q), define the matrix in bidiagonal-block form. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] U1 */
+/* > \verbatim */
+/* >          U1 is DOUBLE PRECISION array, dimension (LDU1,P) */
+/* >          On entry, a P-by-P matrix. On exit, U1 is postmultiplied */
+/* >          by the left singular vector matrix common to [ B11 ; 0 ] and */
+/* >          [ B12 0 0 ; 0 -I 0 0 ]. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU1 */
+/* > \verbatim */
+/* >          LDU1 is INTEGER */
+/* >          The leading dimension of the array U1, LDU1 >= MAX(1,P). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] U2 */
+/* > \verbatim */
+/* >          U2 is DOUBLE PRECISION array, dimension (LDU2,M-P) */
+/* >          On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is */
+/* >          postmultiplied by the left singular vector matrix common to */
+/* >          [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ]. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU2 */
+/* > \verbatim */
+/* >          LDU2 is INTEGER */
+/* >          The leading dimension of the array U2, LDU2 >= MAX(1,M-P). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] V1T */
+/* > \verbatim */
+/* >          V1T is DOUBLE PRECISION array, dimension (LDV1T,Q) */
+/* >          On entry, a Q-by-Q matrix. On exit, V1T is premultiplied */
+/* >          by the transpose of the right singular vector */
+/* >          matrix common to [ B11 ; 0 ] and [ B21 ; 0 ]. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV1T */
+/* > \verbatim */
+/* >          LDV1T is INTEGER */
+/* >          The leading dimension of the array V1T, LDV1T >= MAX(1,Q). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] V2T */
+/* > \verbatim */
+/* >          V2T is DOUBLE PRECISION array, dimension (LDV2T,M-Q) */
+/* >          On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is */
+/* >          premultiplied by the transpose of the right */
+/* >          singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and */
+/* >          [ B22 0 0 ; 0 0 I ]. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV2T */
+/* > \verbatim */
+/* >          LDV2T is INTEGER */
+/* >          The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] B11D */
+/* > \verbatim */
+/* >          B11D is DOUBLE PRECISION array, dimension (Q) */
+/* >          When DBBCSD converges, B11D contains the cosines of THETA(1), */
+/* >          ..., THETA(Q). If DBBCSD fails to converge, then B11D */
+/* >          contains the diagonal of the partially reduced top-left */
+/* >          block. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] B11E */
+/* > \verbatim */
+/* >          B11E is DOUBLE PRECISION array, dimension (Q-1) */
+/* >          When DBBCSD converges, B11E contains zeros. If DBBCSD fails */
+/* >          to converge, then B11E contains the superdiagonal of the */
+/* >          partially reduced top-left block. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] B12D */
+/* > \verbatim */
+/* >          B12D is DOUBLE PRECISION array, dimension (Q) */
+/* >          When DBBCSD converges, B12D contains the negative sines of */
+/* >          THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then */
+/* >          B12D contains the diagonal of the partially reduced top-right */
+/* >          block. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] B12E */
+/* > \verbatim */
+/* >          B12E is DOUBLE PRECISION array, dimension (Q-1) */
+/* >          When DBBCSD converges, B12E contains zeros. If DBBCSD fails */
+/* >          to converge, then B12E contains the subdiagonal of the */
+/* >          partially reduced top-right block. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] B21D */
+/* > \verbatim */
+/* >          B21D is DOUBLE PRECISION  array, dimension (Q) */
+/* >          When DBBCSD converges, B21D contains the negative sines of */
+/* >          THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then */
+/* >          B21D contains the diagonal of the partially reduced bottom-left */
+/* >          block. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] B21E */
+/* > \verbatim */
+/* >          B21E is DOUBLE PRECISION  array, dimension (Q-1) */
+/* >          When DBBCSD converges, B21E contains zeros. If DBBCSD fails */
+/* >          to converge, then B21E contains the subdiagonal of the */
+/* >          partially reduced bottom-left block. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] B22D */
+/* > \verbatim */
+/* >          B22D is DOUBLE PRECISION  array, dimension (Q) */
+/* >          When DBBCSD converges, B22D contains the negative sines of */
+/* >          THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then */
+/* >          B22D contains the diagonal of the partially reduced bottom-right */
+/* >          block. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] B22E */
+/* > \verbatim */
+/* >          B22E is DOUBLE PRECISION  array, dimension (Q-1) */
+/* >          When DBBCSD converges, B22E contains zeros. If DBBCSD fails */
+/* >          to converge, then B22E contains the subdiagonal of the */
+/* >          partially reduced bottom-right block. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= MAX(1,8*Q). */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the */
+/* >          routine only calculates the optimal size of the WORK array, */
+/* >          returns this value as the first entry of the work array, and */
+/* >          no error message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if DBBCSD did not converge, INFO specifies the number */
+/* >                of nonzero entries in PHI, and B11D, B11E, etc., */
+/* >                contain the partially reduced matrix. */
+/* > \endverbatim */
+
+/* > \par Internal Parameters: */
+/*  ========================= */
+/* > */
+/* > \verbatim */
+/* >  TOLMUL  DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8))) */
+/* >          TOLMUL controls the convergence criterion of the QR loop. */
+/* >          Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they */
+/* >          are within TOLMUL*EPS of either bound. */
+/* > \endverbatim */
+
+/* > \par References: */
+/*  ================ */
+/* > */
+/* >  [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. */
+/* >      Algorithms, 50(1):33-65, 2009. */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup doubleOTHERcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dbbcsd_(char *jobu1, char *jobu2, char *jobv1t, char *
+	jobv2t, char *trans, integer *m, integer *p, integer *q, doublereal *
+	theta, doublereal *phi, doublereal *u1, integer *ldu1, doublereal *u2,
+	 integer *ldu2, doublereal *v1t, integer *ldv1t, doublereal *v2t, 
+	integer *ldv2t, doublereal *b11d, doublereal *b11e, doublereal *b12d, 
+	doublereal *b12e, doublereal *b21d, doublereal *b21e, doublereal *
+	b22d, doublereal *b22e, doublereal *work, integer *lwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer u1_dim1, u1_offset, u2_dim1, u2_offset, v1t_dim1, v1t_offset, 
+	    v2t_dim1, v2t_offset, i__1, i__2;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Local variables */
+    integer imin, mini, imax, iter;
+    doublereal unfl, temp;
+    logical colmajor;
+    doublereal thetamin, thetamax;
+    logical restart11, restart12, restart21, restart22;
+    extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal 
+	    *, doublereal *, doublereal *);
+    integer lworkmin, iu1cs, iu2cs, iu1sn, iu2sn, lworkopt, i__, j;
+    doublereal r__;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, 
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *
+	    , doublereal *, integer *);
+    integer maxit;
+    doublereal dummy, x1, x2, y1, y2;
+    integer iv1tcs, iv2tcs;
+    logical wantu1, wantu2;
+    integer iv1tsn, iv2tsn;
+    extern doublereal dlamch_(char *);
+    doublereal mu, nu, sigma11, sigma21;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    doublereal thresh, tolmul;
+    extern /* Subroutine */ int mecago_();
+    logical lquery;
+    doublereal b11bulge;
+    logical wantv1t, wantv2t;
+    doublereal b12bulge, b21bulge, b22bulge, eps, tol;
+    extern /* Subroutine */ int dlartgp_(doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *), dlartgs_(doublereal *, 
+	    doublereal *, doublereal *, doublereal *, doublereal *);
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  =================================================================== */
+
+
+
+/*     Test input arguments */
+
+    /* Parameter adjustments */
+    --theta;
+    --phi;
+    u1_dim1 = *ldu1;
+    u1_offset = 1 + u1_dim1 * 1;
+    u1 -= u1_offset;
+    u2_dim1 = *ldu2;
+    u2_offset = 1 + u2_dim1 * 1;
+    u2 -= u2_offset;
+    v1t_dim1 = *ldv1t;
+    v1t_offset = 1 + v1t_dim1 * 1;
+    v1t -= v1t_offset;
+    v2t_dim1 = *ldv2t;
+    v2t_offset = 1 + v2t_dim1 * 1;
+    v2t -= v2t_offset;
+    --b11d;
+    --b11e;
+    --b12d;
+    --b12e;
+    --b21d;
+    --b21e;
+    --b22d;
+    --b22e;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantu1 = lsame_(jobu1, "Y");
+    wantu2 = lsame_(jobu2, "Y");
+    wantv1t = lsame_(jobv1t, "Y");
+    wantv2t = lsame_(jobv2t, "Y");
+    colmajor = ! lsame_(trans, "T");
+
+    if (*m < 0) {
+	*info = -6;
+    } else if (*p < 0 || *p > *m) {
+	*info = -7;
+    } else if (*q < 0 || *q > *m) {
+	*info = -8;
+    } else if (*q > *p || *q > *m - *p || *q > *m - *q) {
+	*info = -8;
+    } else if (wantu1 && *ldu1 < *p) {
+	*info = -12;
+    } else if (wantu2 && *ldu2 < *m - *p) {
+	*info = -14;
+    } else if (wantv1t && *ldv1t < *q) {
+	*info = -16;
+    } else if (wantv2t && *ldv2t < *m - *q) {
+	*info = -18;
+    }
+
+/*     Quick return if Q = 0 */
+
+    if (*info == 0 && *q == 0) {
+	lworkmin = 1;
+	work[1] = (doublereal) lworkmin;
+	return 0;
+    }
+
+/*     Compute workspace */
+
+    if (*info == 0) {
+	iu1cs = 1;
+	iu1sn = iu1cs + *q;
+	iu2cs = iu1sn + *q;
+	iu2sn = iu2cs + *q;
+	iv1tcs = iu2sn + *q;
+	iv1tsn = iv1tcs + *q;
+	iv2tcs = iv1tsn + *q;
+	iv2tsn = iv2tcs + *q;
+	lworkopt = iv2tsn + *q - 1;
+	lworkmin = lworkopt;
+	work[1] = (doublereal) lworkopt;
+	if (*lwork < lworkmin && ! lquery) {
+	    *info = -28;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DBBCSD", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = dlamch_("Epsilon");
+    unfl = dlamch_("Safe minimum");
+/* Computing MAX */
+/* Computing MIN */
+    d__3 = 100., d__4 = pow_dd(&eps, &c_b10);
+    d__1 = 10., d__2 = f2cmin(d__3,d__4);
+    tolmul = f2cmax(d__1,d__2);
+    tol = tolmul * eps;
+/* Computing MAX */
+    d__1 = tol, d__2 = *q * 6 * *q * unfl;
+    thresh = f2cmax(d__1,d__2);
+
+/*     Test for negligible sines or cosines */
+
+    i__1 = *q;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (theta[i__] < thresh) {
+	    theta[i__] = 0.;
+	} else if (theta[i__] > 1.57079632679489662 - thresh) {
+	    theta[i__] = 1.57079632679489662;
+	}
+    }
+    i__1 = *q - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (phi[i__] < thresh) {
+	    phi[i__] = 0.;
+	} else if (phi[i__] > 1.57079632679489662 - thresh) {
+	    phi[i__] = 1.57079632679489662;
+	}
+    }
+
+/*     Initial deflation */
+
+    imax = *q;
+    while(imax > 1) {
+	if (phi[imax - 1] != 0.) {
+	    myexit_();
+	}
+	--imax;
+    }
+    imin = imax - 1;
+    if (imin > 1) {
+	while(phi[imin - 1] != 0.) {
+	    --imin;
+	    if (imin <= 1) {
+		myexit_();
+	    }
+	}
+    }
+
+/*     Initialize iteration counter */
+
+    maxit = *q * 6 * *q;
+    iter = 0;
+
+/*     Begin main iteration loop */
+
+    while(imax > 1) {
+
+/*        Compute the matrix entries */
+
+	b11d[imin] = cos(theta[imin]);
+	b21d[imin] = -sin(theta[imin]);
+	i__1 = imax - 1;
+	for (i__ = imin; i__ <= i__1; ++i__) {
+	    b11e[i__] = -sin(theta[i__]) * sin(phi[i__]);
+	    b11d[i__ + 1] = cos(theta[i__ + 1]) * cos(phi[i__]);
+	    b12d[i__] = sin(theta[i__]) * cos(phi[i__]);
+	    b12e[i__] = cos(theta[i__ + 1]) * sin(phi[i__]);
+	    b21e[i__] = -cos(theta[i__]) * sin(phi[i__]);
+	    b21d[i__ + 1] = -sin(theta[i__ + 1]) * cos(phi[i__]);
+	    b22d[i__] = cos(theta[i__]) * cos(phi[i__]);
+	    b22e[i__] = -sin(theta[i__ + 1]) * sin(phi[i__]);
+	}
+	b12d[imax] = sin(theta[imax]);
+	b22d[imax] = cos(theta[imax]);
+
+/*        Abort if not converging; otherwise, increment ITER */
+
+	if (iter > maxit) {
+	    *info = 0;
+	    i__1 = *q;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (phi[i__] != 0.) {
+		    ++(*info);
+		}
+	    }
+	    return 0;
+	}
+
+	iter = iter + imax - imin;
+
+/*        Compute shifts */
+
+	thetamax = theta[imin];
+	thetamin = theta[imin];
+	i__1 = imax;
+	for (i__ = imin + 1; i__ <= i__1; ++i__) {
+	    if (theta[i__] > thetamax) {
+		thetamax = theta[i__];
+	    }
+	    if (theta[i__] < thetamin) {
+		thetamin = theta[i__];
+	    }
+	}
+
+	if (thetamax > 1.57079632679489662 - thresh) {
+
+/*           Zero on diagonals of B11 and B22; induce deflation with a */
+/*           zero shift */
+
+	    mu = 0.;
+	    nu = 1.;
+
+	} else if (thetamin < thresh) {
+
+/*           Zero on diagonals of B12 and B22; induce deflation with a */
+/*           zero shift */
+
+	    mu = 1.;
+	    nu = 0.;
+
+	} else {
+
+/*           Compute shifts for B11 and B21 and use the lesser */
+
+	    dlas2_(&b11d[imax - 1], &b11e[imax - 1], &b11d[imax], &sigma11, &
+		    dummy);
+	    dlas2_(&b21d[imax - 1], &b21e[imax - 1], &b21d[imax], &sigma21, &
+		    dummy);
+
+	    if (sigma11 <= sigma21) {
+		mu = sigma11;
+/* Computing 2nd power */
+		d__1 = mu;
+		nu = sqrt(1. - d__1 * d__1);
+		if (mu < thresh) {
+		    mu = 0.;
+		    nu = 1.;
+		}
+	    } else {
+		nu = sigma21;
+/* Computing 2nd power */
+		d__1 = nu;
+		mu = sqrt(1.f - d__1 * d__1);
+		if (nu < thresh) {
+		    mu = 1.;
+		    nu = 0.;
+		}
+	    }
+	}
+
+/*        Rotate to produce bulges in B11 and B21 */
+
+	if (mu <= nu) {
+	    dlartgs_(&b11d[imin], &b11e[imin], &mu, &work[iv1tcs + imin - 1], 
+		    &work[iv1tsn + imin - 1]);
+	} else {
+	    dlartgs_(&b21d[imin], &b21e[imin], &nu, &work[iv1tcs + imin - 1], 
+		    &work[iv1tsn + imin - 1]);
+	}
+
+	temp = work[iv1tcs + imin - 1] * b11d[imin] + work[iv1tsn + imin - 1] 
+		* b11e[imin];
+	b11e[imin] = work[iv1tcs + imin - 1] * b11e[imin] - work[iv1tsn + 
+		imin - 1] * b11d[imin];
+	b11d[imin] = temp;
+	b11bulge = work[iv1tsn + imin - 1] * b11d[imin + 1];
+	b11d[imin + 1] = work[iv1tcs + imin - 1] * b11d[imin + 1];
+	temp = work[iv1tcs + imin - 1] * b21d[imin] + work[iv1tsn + imin - 1] 
+		* b21e[imin];
+	b21e[imin] = work[iv1tcs + imin - 1] * b21e[imin] - work[iv1tsn + 
+		imin - 1] * b21d[imin];
+	b21d[imin] = temp;
+	b21bulge = work[iv1tsn + imin - 1] * b21d[imin + 1];
+	b21d[imin + 1] = work[iv1tcs + imin - 1] * b21d[imin + 1];
+
+/*        Compute THETA(IMIN) */
+
+/* Computing 2nd power */
+	d__1 = b21d[imin];
+/* Computing 2nd power */
+	d__2 = b21bulge;
+/* Computing 2nd power */
+	d__3 = b11d[imin];
+/* Computing 2nd power */
+	d__4 = b11bulge;
+	theta[imin] = atan2(sqrt(d__1 * d__1 + d__2 * d__2), sqrt(d__3 * d__3 
+		+ d__4 * d__4));
+
+/*        Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN) */
+
+/* Computing 2nd power */
+	d__1 = b11d[imin];
+/* Computing 2nd power */
+	d__2 = b11bulge;
+/* Computing 2nd power */
+	d__3 = thresh;
+	if (d__1 * d__1 + d__2 * d__2 > d__3 * d__3) {
+	    dlartgp_(&b11bulge, &b11d[imin], &work[iu1sn + imin - 1], &work[
+		    iu1cs + imin - 1], &r__);
+	} else if (mu <= nu) {
+	    dlartgs_(&b11e[imin], &b11d[imin + 1], &mu, &work[iu1cs + imin - 
+		    1], &work[iu1sn + imin - 1]);
+	} else {
+	    dlartgs_(&b12d[imin], &b12e[imin], &nu, &work[iu1cs + imin - 1], &
+		    work[iu1sn + imin - 1]);
+	}
+/* Computing 2nd power */
+	d__1 = b21d[imin];
+/* Computing 2nd power */
+	d__2 = b21bulge;
+/* Computing 2nd power */
+	d__3 = thresh;
+	if (d__1 * d__1 + d__2 * d__2 > d__3 * d__3) {
+	    dlartgp_(&b21bulge, &b21d[imin], &work[iu2sn + imin - 1], &work[
+		    iu2cs + imin - 1], &r__);
+	} else if (nu < mu) {
+	    dlartgs_(&b21e[imin], &b21d[imin + 1], &nu, &work[iu2cs + imin - 
+		    1], &work[iu2sn + imin - 1]);
+	} else {
+	    dlartgs_(&b22d[imin], &b22e[imin], &mu, &work[iu2cs + imin - 1], &
+		    work[iu2sn + imin - 1]);
+	}
+	work[iu2cs + imin - 1] = -work[iu2cs + imin - 1];
+	work[iu2sn + imin - 1] = -work[iu2sn + imin - 1];
+
+	temp = work[iu1cs + imin - 1] * b11e[imin] + work[iu1sn + imin - 1] * 
+		b11d[imin + 1];
+	b11d[imin + 1] = work[iu1cs + imin - 1] * b11d[imin + 1] - work[iu1sn 
+		+ imin - 1] * b11e[imin];
+	b11e[imin] = temp;
+	if (imax > imin + 1) {
+	    b11bulge = work[iu1sn + imin - 1] * b11e[imin + 1];
+	    b11e[imin + 1] = work[iu1cs + imin - 1] * b11e[imin + 1];
+	}
+	temp = work[iu1cs + imin - 1] * b12d[imin] + work[iu1sn + imin - 1] * 
+		b12e[imin];
+	b12e[imin] = work[iu1cs + imin - 1] * b12e[imin] - work[iu1sn + imin 
+		- 1] * b12d[imin];
+	b12d[imin] = temp;
+	b12bulge = work[iu1sn + imin - 1] * b12d[imin + 1];
+	b12d[imin + 1] = work[iu1cs + imin - 1] * b12d[imin + 1];
+	temp = work[iu2cs + imin - 1] * b21e[imin] + work[iu2sn + imin - 1] * 
+		b21d[imin + 1];
+	b21d[imin + 1] = work[iu2cs + imin - 1] * b21d[imin + 1] - work[iu2sn 
+		+ imin - 1] * b21e[imin];
+	b21e[imin] = temp;
+	if (imax > imin + 1) {
+	    b21bulge = work[iu2sn + imin - 1] * b21e[imin + 1];
+	    b21e[imin + 1] = work[iu2cs + imin - 1] * b21e[imin + 1];
+	}
+	temp = work[iu2cs + imin - 1] * b22d[imin] + work[iu2sn + imin - 1] * 
+		b22e[imin];
+	b22e[imin] = work[iu2cs + imin - 1] * b22e[imin] - work[iu2sn + imin 
+		- 1] * b22d[imin];
+	b22d[imin] = temp;
+	b22bulge = work[iu2sn + imin - 1] * b22d[imin + 1];
+	b22d[imin + 1] = work[iu2cs + imin - 1] * b22d[imin + 1];
+
+/*        Inner loop: chase bulges from B11(IMIN,IMIN+2), */
+/*        B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to */
+/*        bottom-right */
+
+	i__1 = imax - 1;
+	for (i__ = imin + 1; i__ <= i__1; ++i__) {
+
+/*           Compute PHI(I-1) */
+
+	    x1 = sin(theta[i__ - 1]) * b11e[i__ - 1] + cos(theta[i__ - 1]) * 
+		    b21e[i__ - 1];
+	    x2 = sin(theta[i__ - 1]) * b11bulge + cos(theta[i__ - 1]) * 
+		    b21bulge;
+	    y1 = sin(theta[i__ - 1]) * b12d[i__ - 1] + cos(theta[i__ - 1]) * 
+		    b22d[i__ - 1];
+	    y2 = sin(theta[i__ - 1]) * b12bulge + cos(theta[i__ - 1]) * 
+		    b22bulge;
+
+/* Computing 2nd power */
+	    d__1 = x1;
+/* Computing 2nd power */
+	    d__2 = x2;
+/* Computing 2nd power */
+	    d__3 = y1;
+/* Computing 2nd power */
+	    d__4 = y2;
+	    phi[i__ - 1] = atan2(sqrt(d__1 * d__1 + d__2 * d__2), sqrt(d__3 * 
+		    d__3 + d__4 * d__4));
+
+/*           Determine if there are bulges to chase or if a new direct */
+/*           summand has been reached */
+
+/* Computing 2nd power */
+	    d__1 = b11e[i__ - 1];
+/* Computing 2nd power */
+	    d__2 = b11bulge;
+/* Computing 2nd power */
+	    d__3 = thresh;
+	    restart11 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+/* Computing 2nd power */
+	    d__1 = b21e[i__ - 1];
+/* Computing 2nd power */
+	    d__2 = b21bulge;
+/* Computing 2nd power */
+	    d__3 = thresh;
+	    restart21 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+/* Computing 2nd power */
+	    d__1 = b12d[i__ - 1];
+/* Computing 2nd power */
+	    d__2 = b12bulge;
+/* Computing 2nd power */
+	    d__3 = thresh;
+	    restart12 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+/* Computing 2nd power */
+	    d__1 = b22d[i__ - 1];
+/* Computing 2nd power */
+	    d__2 = b22bulge;
+/* Computing 2nd power */
+	    d__3 = thresh;
+	    restart22 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+
+/*           If possible, chase bulges from B11(I-1,I+1), B12(I-1,I), */
+/*           B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge- */
+/*           chasing by applying the original shift again. */
+
+	    if (! restart11 && ! restart21) {
+		dlartgp_(&x2, &x1, &work[iv1tsn + i__ - 1], &work[iv1tcs + 
+			i__ - 1], &r__);
+	    } else if (! restart11 && restart21) {
+		dlartgp_(&b11bulge, &b11e[i__ - 1], &work[iv1tsn + i__ - 1], &
+			work[iv1tcs + i__ - 1], &r__);
+	    } else if (restart11 && ! restart21) {
+		dlartgp_(&b21bulge, &b21e[i__ - 1], &work[iv1tsn + i__ - 1], &
+			work[iv1tcs + i__ - 1], &r__);
+	    } else if (mu <= nu) {
+		dlartgs_(&b11d[i__], &b11e[i__], &mu, &work[iv1tcs + i__ - 1],
+			 &work[iv1tsn + i__ - 1]);
+	    } else {
+		dlartgs_(&b21d[i__], &b21e[i__], &nu, &work[iv1tcs + i__ - 1],
+			 &work[iv1tsn + i__ - 1]);
+	    }
+	    work[iv1tcs + i__ - 1] = -work[iv1tcs + i__ - 1];
+	    work[iv1tsn + i__ - 1] = -work[iv1tsn + i__ - 1];
+	    if (! restart12 && ! restart22) {
+		dlartgp_(&y2, &y1, &work[iv2tsn + i__ - 2], &work[iv2tcs + 
+			i__ - 2], &r__);
+	    } else if (! restart12 && restart22) {
+		dlartgp_(&b12bulge, &b12d[i__ - 1], &work[iv2tsn + i__ - 2], &
+			work[iv2tcs + i__ - 2], &r__);
+	    } else if (restart12 && ! restart22) {
+		dlartgp_(&b22bulge, &b22d[i__ - 1], &work[iv2tsn + i__ - 2], &
+			work[iv2tcs + i__ - 2], &r__);
+	    } else if (nu < mu) {
+		dlartgs_(&b12e[i__ - 1], &b12d[i__], &nu, &work[iv2tcs + i__ 
+			- 2], &work[iv2tsn + i__ - 2]);
+	    } else {
+		dlartgs_(&b22e[i__ - 1], &b22d[i__], &mu, &work[iv2tcs + i__ 
+			- 2], &work[iv2tsn + i__ - 2]);
+	    }
+
+	    temp = work[iv1tcs + i__ - 1] * b11d[i__] + work[iv1tsn + i__ - 1]
+		     * b11e[i__];
+	    b11e[i__] = work[iv1tcs + i__ - 1] * b11e[i__] - work[iv1tsn + 
+		    i__ - 1] * b11d[i__];
+	    b11d[i__] = temp;
+	    b11bulge = work[iv1tsn + i__ - 1] * b11d[i__ + 1];
+	    b11d[i__ + 1] = work[iv1tcs + i__ - 1] * b11d[i__ + 1];
+	    temp = work[iv1tcs + i__ - 1] * b21d[i__] + work[iv1tsn + i__ - 1]
+		     * b21e[i__];
+	    b21e[i__] = work[iv1tcs + i__ - 1] * b21e[i__] - work[iv1tsn + 
+		    i__ - 1] * b21d[i__];
+	    b21d[i__] = temp;
+	    b21bulge = work[iv1tsn + i__ - 1] * b21d[i__ + 1];
+	    b21d[i__ + 1] = work[iv1tcs + i__ - 1] * b21d[i__ + 1];
+	    temp = work[iv2tcs + i__ - 2] * b12e[i__ - 1] + work[iv2tsn + i__ 
+		    - 2] * b12d[i__];
+	    b12d[i__] = work[iv2tcs + i__ - 2] * b12d[i__] - work[iv2tsn + 
+		    i__ - 2] * b12e[i__ - 1];
+	    b12e[i__ - 1] = temp;
+	    b12bulge = work[iv2tsn + i__ - 2] * b12e[i__];
+	    b12e[i__] = work[iv2tcs + i__ - 2] * b12e[i__];
+	    temp = work[iv2tcs + i__ - 2] * b22e[i__ - 1] + work[iv2tsn + i__ 
+		    - 2] * b22d[i__];
+	    b22d[i__] = work[iv2tcs + i__ - 2] * b22d[i__] - work[iv2tsn + 
+		    i__ - 2] * b22e[i__ - 1];
+	    b22e[i__ - 1] = temp;
+	    b22bulge = work[iv2tsn + i__ - 2] * b22e[i__];
+	    b22e[i__] = work[iv2tcs + i__ - 2] * b22e[i__];
+
+/*           Compute THETA(I) */
+
+	    x1 = cos(phi[i__ - 1]) * b11d[i__] + sin(phi[i__ - 1]) * b12e[i__ 
+		    - 1];
+	    x2 = cos(phi[i__ - 1]) * b11bulge + sin(phi[i__ - 1]) * b12bulge;
+	    y1 = cos(phi[i__ - 1]) * b21d[i__] + sin(phi[i__ - 1]) * b22e[i__ 
+		    - 1];
+	    y2 = cos(phi[i__ - 1]) * b21bulge + sin(phi[i__ - 1]) * b22bulge;
+
+/* Computing 2nd power */
+	    d__1 = y1;
+/* Computing 2nd power */
+	    d__2 = y2;
+/* Computing 2nd power */
+	    d__3 = x1;
+/* Computing 2nd power */
+	    d__4 = x2;
+	    theta[i__] = atan2(sqrt(d__1 * d__1 + d__2 * d__2), sqrt(d__3 * 
+		    d__3 + d__4 * d__4));
+
+/*           Determine if there are bulges to chase or if a new direct */
+/*           summand has been reached */
+
+/* Computing 2nd power */
+	    d__1 = b11d[i__];
+/* Computing 2nd power */
+	    d__2 = b11bulge;
+/* Computing 2nd power */
+	    d__3 = thresh;
+	    restart11 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+/* Computing 2nd power */
+	    d__1 = b12e[i__ - 1];
+/* Computing 2nd power */
+	    d__2 = b12bulge;
+/* Computing 2nd power */
+	    d__3 = thresh;
+	    restart12 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+/* Computing 2nd power */
+	    d__1 = b21d[i__];
+/* Computing 2nd power */
+	    d__2 = b21bulge;
+/* Computing 2nd power */
+	    d__3 = thresh;
+	    restart21 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+/* Computing 2nd power */
+	    d__1 = b22e[i__ - 1];
+/* Computing 2nd power */
+	    d__2 = b22bulge;
+/* Computing 2nd power */
+	    d__3 = thresh;
+	    restart22 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+
+/*           If possible, chase bulges from B11(I+1,I), B12(I+1,I-1), */
+/*           B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge- */
+/*           chasing by applying the original shift again. */
+
+	    if (! restart11 && ! restart12) {
+		dlartgp_(&x2, &x1, &work[iu1sn + i__ - 1], &work[iu1cs + i__ 
+			- 1], &r__);
+	    } else if (! restart11 && restart12) {
+		dlartgp_(&b11bulge, &b11d[i__], &work[iu1sn + i__ - 1], &work[
+			iu1cs + i__ - 1], &r__);
+	    } else if (restart11 && ! restart12) {
+		dlartgp_(&b12bulge, &b12e[i__ - 1], &work[iu1sn + i__ - 1], &
+			work[iu1cs + i__ - 1], &r__);
+	    } else if (mu <= nu) {
+		dlartgs_(&b11e[i__], &b11d[i__ + 1], &mu, &work[iu1cs + i__ - 
+			1], &work[iu1sn + i__ - 1]);
+	    } else {
+		dlartgs_(&b12d[i__], &b12e[i__], &nu, &work[iu1cs + i__ - 1], 
+			&work[iu1sn + i__ - 1]);
+	    }
+	    if (! restart21 && ! restart22) {
+		dlartgp_(&y2, &y1, &work[iu2sn + i__ - 1], &work[iu2cs + i__ 
+			- 1], &r__);
+	    } else if (! restart21 && restart22) {
+		dlartgp_(&b21bulge, &b21d[i__], &work[iu2sn + i__ - 1], &work[
+			iu2cs + i__ - 1], &r__);
+	    } else if (restart21 && ! restart22) {
+		dlartgp_(&b22bulge, &b22e[i__ - 1], &work[iu2sn + i__ - 1], &
+			work[iu2cs + i__ - 1], &r__);
+	    } else if (nu < mu) {
+		dlartgs_(&b21e[i__], &b21e[i__ + 1], &nu, &work[iu2cs + i__ - 
+			1], &work[iu2sn + i__ - 1]);
+	    } else {
+		dlartgs_(&b22d[i__], &b22e[i__], &mu, &work[iu2cs + i__ - 1], 
+			&work[iu2sn + i__ - 1]);
+	    }
+	    work[iu2cs + i__ - 1] = -work[iu2cs + i__ - 1];
+	    work[iu2sn + i__ - 1] = -work[iu2sn + i__ - 1];
+
+	    temp = work[iu1cs + i__ - 1] * b11e[i__] + work[iu1sn + i__ - 1] *
+		     b11d[i__ + 1];
+	    b11d[i__ + 1] = work[iu1cs + i__ - 1] * b11d[i__ + 1] - work[
+		    iu1sn + i__ - 1] * b11e[i__];
+	    b11e[i__] = temp;
+	    if (i__ < imax - 1) {
+		b11bulge = work[iu1sn + i__ - 1] * b11e[i__ + 1];
+		b11e[i__ + 1] = work[iu1cs + i__ - 1] * b11e[i__ + 1];
+	    }
+	    temp = work[iu2cs + i__ - 1] * b21e[i__] + work[iu2sn + i__ - 1] *
+		     b21d[i__ + 1];
+	    b21d[i__ + 1] = work[iu2cs + i__ - 1] * b21d[i__ + 1] - work[
+		    iu2sn + i__ - 1] * b21e[i__];
+	    b21e[i__] = temp;
+	    if (i__ < imax - 1) {
+		b21bulge = work[iu2sn + i__ - 1] * b21e[i__ + 1];
+		b21e[i__ + 1] = work[iu2cs + i__ - 1] * b21e[i__ + 1];
+	    }
+	    temp = work[iu1cs + i__ - 1] * b12d[i__] + work[iu1sn + i__ - 1] *
+		     b12e[i__];
+	    b12e[i__] = work[iu1cs + i__ - 1] * b12e[i__] - work[iu1sn + i__ 
+		    - 1] * b12d[i__];
+	    b12d[i__] = temp;
+	    b12bulge = work[iu1sn + i__ - 1] * b12d[i__ + 1];
+	    b12d[i__ + 1] = work[iu1cs + i__ - 1] * b12d[i__ + 1];
+	    temp = work[iu2cs + i__ - 1] * b22d[i__] + work[iu2sn + i__ - 1] *
+		     b22e[i__];
+	    b22e[i__] = work[iu2cs + i__ - 1] * b22e[i__] - work[iu2sn + i__ 
+		    - 1] * b22d[i__];
+	    b22d[i__] = temp;
+	    b22bulge = work[iu2sn + i__ - 1] * b22d[i__ + 1];
+	    b22d[i__ + 1] = work[iu2cs + i__ - 1] * b22d[i__ + 1];
+
+	}
+
+/*        Compute PHI(IMAX-1) */
+
+	x1 = sin(theta[imax - 1]) * b11e[imax - 1] + cos(theta[imax - 1]) * 
+		b21e[imax - 1];
+	y1 = sin(theta[imax - 1]) * b12d[imax - 1] + cos(theta[imax - 1]) * 
+		b22d[imax - 1];
+	y2 = sin(theta[imax - 1]) * b12bulge + cos(theta[imax - 1]) * 
+		b22bulge;
+
+/* Computing 2nd power */
+	d__1 = y1;
+/* Computing 2nd power */
+	d__2 = y2;
+	phi[imax - 1] = atan2((abs(x1)), sqrt(d__1 * d__1 + d__2 * d__2));
+
+/*        Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX) */
+
+/* Computing 2nd power */
+	d__1 = b12d[imax - 1];
+/* Computing 2nd power */
+	d__2 = b12bulge;
+/* Computing 2nd power */
+	d__3 = thresh;
+	restart12 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+/* Computing 2nd power */
+	d__1 = b22d[imax - 1];
+/* Computing 2nd power */
+	d__2 = b22bulge;
+/* Computing 2nd power */
+	d__3 = thresh;
+	restart22 = d__1 * d__1 + d__2 * d__2 <= d__3 * d__3;
+
+	if (! restart12 && ! restart22) {
+	    dlartgp_(&y2, &y1, &work[iv2tsn + imax - 2], &work[iv2tcs + imax 
+		    - 2], &r__);
+	} else if (! restart12 && restart22) {
+	    dlartgp_(&b12bulge, &b12d[imax - 1], &work[iv2tsn + imax - 2], &
+		    work[iv2tcs + imax - 2], &r__);
+	} else if (restart12 && ! restart22) {
+	    dlartgp_(&b22bulge, &b22d[imax - 1], &work[iv2tsn + imax - 2], &
+		    work[iv2tcs + imax - 2], &r__);
+	} else if (nu < mu) {
+	    dlartgs_(&b12e[imax - 1], &b12d[imax], &nu, &work[iv2tcs + imax - 
+		    2], &work[iv2tsn + imax - 2]);
+	} else {
+	    dlartgs_(&b22e[imax - 1], &b22d[imax], &mu, &work[iv2tcs + imax - 
+		    2], &work[iv2tsn + imax - 2]);
+	}
+
+	temp = work[iv2tcs + imax - 2] * b12e[imax - 1] + work[iv2tsn + imax 
+		- 2] * b12d[imax];
+	b12d[imax] = work[iv2tcs + imax - 2] * b12d[imax] - work[iv2tsn + 
+		imax - 2] * b12e[imax - 1];
+	b12e[imax - 1] = temp;
+	temp = work[iv2tcs + imax - 2] * b22e[imax - 1] + work[iv2tsn + imax 
+		- 2] * b22d[imax];
+	b22d[imax] = work[iv2tcs + imax - 2] * b22d[imax] - work[iv2tsn + 
+		imax - 2] * b22e[imax - 1];
+	b22e[imax - 1] = temp;
+
+/*        Update singular vectors */
+
+	if (wantu1) {
+	    if (colmajor) {
+		i__1 = imax - imin + 1;
+		dlasr_("R", "V", "F", p, &i__1, &work[iu1cs + imin - 1], &
+			work[iu1sn + imin - 1], &u1[imin * u1_dim1 + 1], ldu1);
+	    } else {
+		i__1 = imax - imin + 1;
+		dlasr_("L", "V", "F", &i__1, p, &work[iu1cs + imin - 1], &
+			work[iu1sn + imin - 1], &u1[imin + u1_dim1], ldu1);
+	    }
+	}
+	if (wantu2) {
+	    if (colmajor) {
+		i__1 = *m - *p;
+		i__2 = imax - imin + 1;
+		dlasr_("R", "V", "F", &i__1, &i__2, &work[iu2cs + imin - 1], &
+			work[iu2sn + imin - 1], &u2[imin * u2_dim1 + 1], ldu2);
+	    } else {
+		i__1 = imax - imin + 1;
+		i__2 = *m - *p;
+		dlasr_("L", "V", "F", &i__1, &i__2, &work[iu2cs + imin - 1], &
+			work[iu2sn + imin - 1], &u2[imin + u2_dim1], ldu2);
+	    }
+	}
+	if (wantv1t) {
+	    if (colmajor) {
+		i__1 = imax - imin + 1;
+		dlasr_("L", "V", "F", &i__1, q, &work[iv1tcs + imin - 1], &
+			work[iv1tsn + imin - 1], &v1t[imin + v1t_dim1], ldv1t);
+	    } else {
+		i__1 = imax - imin + 1;
+		dlasr_("R", "V", "F", q, &i__1, &work[iv1tcs + imin - 1], &
+			work[iv1tsn + imin - 1], &v1t[imin * v1t_dim1 + 1], 
+			ldv1t);
+	    }
+	}
+	if (wantv2t) {
+	    if (colmajor) {
+		i__1 = imax - imin + 1;
+		i__2 = *m - *q;
+		dlasr_("L", "V", "F", &i__1, &i__2, &work[iv2tcs + imin - 1], 
+			&work[iv2tsn + imin - 1], &v2t[imin + v2t_dim1], 
+			ldv2t);
+	    } else {
+		i__1 = *m - *q;
+		i__2 = imax - imin + 1;
+		dlasr_("R", "V", "F", &i__1, &i__2, &work[iv2tcs + imin - 1], 
+			&work[iv2tsn + imin - 1], &v2t[imin * v2t_dim1 + 1], 
+			ldv2t);
+	    }
+	}
+
+/*        Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX) */
+
+	if (b11e[imax - 1] + b21e[imax - 1] > 0.) {
+	    b11d[imax] = -b11d[imax];
+	    b21d[imax] = -b21d[imax];
+	    if (wantv1t) {
+		if (colmajor) {
+		    dscal_(q, &c_b35, &v1t[imax + v1t_dim1], ldv1t);
+		} else {
+		    dscal_(q, &c_b35, &v1t[imax * v1t_dim1 + 1], &c__1);
+		}
+	    }
+	}
+
+/*        Compute THETA(IMAX) */
+
+	x1 = cos(phi[imax - 1]) * b11d[imax] + sin(phi[imax - 1]) * b12e[imax 
+		- 1];
+	y1 = cos(phi[imax - 1]) * b21d[imax] + sin(phi[imax - 1]) * b22e[imax 
+		- 1];
+
+	theta[imax] = atan2((abs(y1)), (abs(x1)));
+
+/*        Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX), */
+/*        and B22(IMAX,IMAX-1) */
+
+	if (b11d[imax] + b12e[imax - 1] < 0.) {
+	    b12d[imax] = -b12d[imax];
+	    if (wantu1) {
+		if (colmajor) {
+		    dscal_(p, &c_b35, &u1[imax * u1_dim1 + 1], &c__1);
+		} else {
+		    dscal_(p, &c_b35, &u1[imax + u1_dim1], ldu1);
+		}
+	    }
+	}
+	if (b21d[imax] + b22e[imax - 1] > 0.) {
+	    b22d[imax] = -b22d[imax];
+	    if (wantu2) {
+		if (colmajor) {
+		    i__1 = *m - *p;
+		    dscal_(&i__1, &c_b35, &u2[imax * u2_dim1 + 1], &c__1);
+		} else {
+		    i__1 = *m - *p;
+		    dscal_(&i__1, &c_b35, &u2[imax + u2_dim1], ldu2);
+		}
+	    }
+	}
+
+/*        Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX) */
+
+	if (b12d[imax] + b22d[imax] < 0.) {
+	    if (wantv2t) {
+		if (colmajor) {
+		    i__1 = *m - *q;
+		    dscal_(&i__1, &c_b35, &v2t[imax + v2t_dim1], ldv2t);
+		} else {
+		    i__1 = *m - *q;
+		    dscal_(&i__1, &c_b35, &v2t[imax * v2t_dim1 + 1], &c__1);
+		}
+	    }
+	}
+
+/*        Test for negligible sines or cosines */
+
+	i__1 = imax;
+	for (i__ = imin; i__ <= i__1; ++i__) {
+	    if (theta[i__] < thresh) {
+		theta[i__] = 0.;
+	    } else if (theta[i__] > 1.57079632679489662 - thresh) {
+		theta[i__] = 1.57079632679489662;
+	    }
+	}
+	i__1 = imax - 1;
+	for (i__ = imin; i__ <= i__1; ++i__) {
+	    if (phi[i__] < thresh) {
+		phi[i__] = 0.;
+	    } else if (phi[i__] > 1.57079632679489662 - thresh) {
+		phi[i__] = 1.57079632679489662;
+	    }
+	}
+
+/*        Deflate */
+
+	if (imax > 1) {
+	    while(phi[imax - 1] == 0.) {
+		--imax;
+		if (imax <= 1) {
+		    myexit_();
+		}
+	    }
+	}
+	if (imin > imax - 1) {
+	    imin = imax - 1;
+	}
+	if (imin > 1) {
+	    while(phi[imin - 1] != 0.) {
+		--imin;
+		if (imin <= 1) {
+		    myexit_();
+		}
+	    }
+	}
+
+/*        Repeat main iteration loop */
+
+    }
+
+/*     Postprocessing: order THETA from least to greatest */
+
+    i__1 = *q;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+	mini = i__;
+	thetamin = theta[i__];
+	i__2 = *q;
+	for (j = i__ + 1; j <= i__2; ++j) {
+	    if (theta[j] < thetamin) {
+		mini = j;
+		thetamin = theta[j];
+	    }
+	}
+
+	if (mini != i__) {
+	    theta[mini] = theta[i__];
+	    theta[i__] = thetamin;
+	    if (colmajor) {
+		if (wantu1) {
+		    dswap_(p, &u1[i__ * u1_dim1 + 1], &c__1, &u1[mini * 
+			    u1_dim1 + 1], &c__1);
+		}
+		if (wantu2) {
+		    i__2 = *m - *p;
+		    dswap_(&i__2, &u2[i__ * u2_dim1 + 1], &c__1, &u2[mini * 
+			    u2_dim1 + 1], &c__1);
+		}
+		if (wantv1t) {
+		    dswap_(q, &v1t[i__ + v1t_dim1], ldv1t, &v1t[mini + 
+			    v1t_dim1], ldv1t);
+		}
+		if (wantv2t) {
+		    i__2 = *m - *q;
+		    dswap_(&i__2, &v2t[i__ + v2t_dim1], ldv2t, &v2t[mini + 
+			    v2t_dim1], ldv2t);
+		}
+	    } else {
+		if (wantu1) {
+		    dswap_(p, &u1[i__ + u1_dim1], ldu1, &u1[mini + u1_dim1], 
+			    ldu1);
+		}
+		if (wantu2) {
+		    i__2 = *m - *p;
+		    dswap_(&i__2, &u2[i__ + u2_dim1], ldu2, &u2[mini + 
+			    u2_dim1], ldu2);
+		}
+		if (wantv1t) {
+		    dswap_(q, &v1t[i__ * v1t_dim1 + 1], &c__1, &v1t[mini * 
+			    v1t_dim1 + 1], &c__1);
+		}
+		if (wantv2t) {
+		    i__2 = *m - *q;
+		    dswap_(&i__2, &v2t[i__ * v2t_dim1 + 1], &c__1, &v2t[mini *
+			     v2t_dim1 + 1], &c__1);
+		}
+	    }
+	}
+
+    }
+
+    return 0;
+
+/*     End of DBBCSD */
+
+} /* dbbcsd_ */
+
diff --git a/lapack-netlib/SRC/dbdsdc.c b/lapack-netlib/SRC/dbdsdc.c
new file mode 100644
index 000000000..f94b56467
--- /dev/null
+++ b/lapack-netlib/SRC/dbdsdc.c
@@ -0,0 +1,969 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__9 = 9;
+static integer c__0 = 0;
+static doublereal c_b15 = 1.;
+static integer c__1 = 1;
+static doublereal c_b29 = 0.;
+
+/* > \brief \b DBDSDC */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DBDSDC + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsdc.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsdc.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsdc.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, */
+/*                          WORK, IWORK, INFO ) */
+
+/*       CHARACTER          COMPQ, UPLO */
+/*       INTEGER            INFO, LDU, LDVT, N */
+/*       INTEGER            IQ( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   D( * ), E( * ), Q( * ), U( LDU, * ), */
+/*      $                   VT( LDVT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DBDSDC computes the singular value decomposition (SVD) of a real */
+/* > N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT, */
+/* > using a divide and conquer method, where S is a diagonal matrix */
+/* > with non-negative diagonal elements (the singular values of B), and */
+/* > U and VT are orthogonal matrices of left and right singular vectors, */
+/* > respectively. DBDSDC can be used to compute all singular values, */
+/* > and optionally, singular vectors or singular vectors in compact form. */
+/* > */
+/* > This code makes very mild assumptions about floating point */
+/* > arithmetic. It will work on machines with a guard digit in */
+/* > add/subtract, or on those binary machines without guard digits */
+/* > which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
+/* > It could conceivably fail on hexadecimal or decimal machines */
+/* > without guard digits, but we know of none.  See DLASD3 for details. */
+/* > */
+/* > The code currently calls DLASDQ if singular values only are desired. */
+/* > However, it can be slightly modified to compute singular values */
+/* > using the divide and conquer method. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] UPLO */
+/* > \verbatim */
+/* >          UPLO is CHARACTER*1 */
+/* >          = 'U':  B is upper bidiagonal. */
+/* >          = 'L':  B is lower bidiagonal. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] COMPQ */
+/* > \verbatim */
+/* >          COMPQ is CHARACTER*1 */
+/* >          Specifies whether singular vectors are to be computed */
+/* >          as follows: */
+/* >          = 'N':  Compute singular values only; */
+/* >          = 'P':  Compute singular values and compute singular */
+/* >                  vectors in compact form; */
+/* >          = 'I':  Compute singular values and singular vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix B.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] D */
+/* > \verbatim */
+/* >          D is DOUBLE PRECISION array, dimension (N) */
+/* >          On entry, the n diagonal elements of the bidiagonal matrix B. */
+/* >          On exit, if INFO=0, the singular values of B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] E */
+/* > \verbatim */
+/* >          E is DOUBLE PRECISION array, dimension (N-1) */
+/* >          On entry, the elements of E contain the offdiagonal */
+/* >          elements of the bidiagonal matrix whose SVD is desired. */
+/* >          On exit, E has been destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is DOUBLE PRECISION array, dimension (LDU,N) */
+/* >          If  COMPQ = 'I', then: */
+/* >             On exit, if INFO = 0, U contains the left singular vectors */
+/* >             of the bidiagonal matrix. */
+/* >          For other values of COMPQ, U is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U.  LDU >= 1. */
+/* >          If singular vectors are desired, then LDU >= f2cmax( 1, N ). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VT */
+/* > \verbatim */
+/* >          VT is DOUBLE PRECISION array, dimension (LDVT,N) */
+/* >          If  COMPQ = 'I', then: */
+/* >             On exit, if INFO = 0, VT**T contains the right singular */
+/* >             vectors of the bidiagonal matrix. */
+/* >          For other values of COMPQ, VT is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVT */
+/* > \verbatim */
+/* >          LDVT is INTEGER */
+/* >          The leading dimension of the array VT.  LDVT >= 1. */
+/* >          If singular vectors are desired, then LDVT >= f2cmax( 1, N ). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] Q */
+/* > \verbatim */
+/* >          Q is DOUBLE PRECISION array, dimension (LDQ) */
+/* >          If  COMPQ = 'P', then: */
+/* >             On exit, if INFO = 0, Q and IQ contain the left */
+/* >             and right singular vectors in a compact form, */
+/* >             requiring O(N log N) space instead of 2*N**2. */
+/* >             In particular, Q contains all the DOUBLE PRECISION data in */
+/* >             LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) */
+/* >             words of memory, where SMLSIZ is returned by ILAENV and */
+/* >             is equal to the maximum size of the subproblems at the */
+/* >             bottom of the computation tree (usually about 25). */
+/* >          For other values of COMPQ, Q is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IQ */
+/* > \verbatim */
+/* >          IQ is INTEGER array, dimension (LDIQ) */
+/* >          If  COMPQ = 'P', then: */
+/* >             On exit, if INFO = 0, Q and IQ contain the left */
+/* >             and right singular vectors in a compact form, */
+/* >             requiring O(N log N) space instead of 2*N**2. */
+/* >             In particular, IQ contains all INTEGER data in */
+/* >             LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) */
+/* >             words of memory, where SMLSIZ is returned by ILAENV and */
+/* >             is equal to the maximum size of the subproblems at the */
+/* >             bottom of the computation tree (usually about 25). */
+/* >          For other values of COMPQ, IQ is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          If COMPQ = 'N' then LWORK >= (4 * N). */
+/* >          If COMPQ = 'P' then LWORK >= (6 * N). */
+/* >          If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (8*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  The algorithm failed to compute a singular value. */
+/* >                The update process of divide and conquer failed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup auxOTHERcomputational */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >     Ming Gu and Huan Ren, Computer Science Division, University of */
+/* >     California at Berkeley, USA */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *
+	d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, 
+	integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer *
+	iwork, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    integer difl, difr, ierr, perm, mlvl, sqre, i__, j, k;
+    doublereal p, r__;
+    integer z__;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, 
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *
+	    , doublereal *, integer *), dswap_(integer *, doublereal *, 
+	    integer *, doublereal *, integer *);
+    integer poles, iuplo, nsize, start;
+    extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    integer *, integer *, doublereal *, integer *);
+    integer ic, ii, kk;
+    doublereal cs;
+    extern doublereal dlamch_(char *);
+    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *);
+    integer is, iu;
+    doublereal sn;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *), dlasdq_(char *, integer *, integer 
+	    *, integer *, integer *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *), dlaset_(char *, 
+	    integer *, integer *, doublereal *, doublereal *, doublereal *, 
+	    integer *), dlartg_(doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    integer givcol;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    integer icompq;
+    doublereal orgnrm;
+    integer givnum, givptr, nm1, qstart, smlsiz, wstart, smlszp;
+    doublereal eps;
+    integer ivt;
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+/*  Changed dimension statement in comment describing E from (N) to */
+/*  (N-1).  Sven, 17 Feb 05. */
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1 * 1;
+    vt -= vt_offset;
+    --q;
+    --iq;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    iuplo = 0;
+    if (lsame_(uplo, "U")) {
+	iuplo = 1;
+    }
+    if (lsame_(uplo, "L")) {
+	iuplo = 2;
+    }
+    if (lsame_(compq, "N")) {
+	icompq = 0;
+    } else if (lsame_(compq, "P")) {
+	icompq = 1;
+    } else if (lsame_(compq, "I")) {
+	icompq = 2;
+    } else {
+	icompq = -1;
+    }
+    if (iuplo == 0) {
+	*info = -1;
+    } else if (icompq < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ldu < 1 || icompq == 2 && *ldu < *n) {
+	*info = -7;
+    } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DBDSDC", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+    if (*n == 1) {
+	if (icompq == 1) {
+	    q[1] = d_sign(&c_b15, &d__[1]);
+	    q[smlsiz * *n + 1] = 1.;
+	} else if (icompq == 2) {
+	    u[u_dim1 + 1] = d_sign(&c_b15, &d__[1]);
+	    vt[vt_dim1 + 1] = 1.;
+	}
+	d__[1] = abs(d__[1]);
+	return 0;
+    }
+    nm1 = *n - 1;
+
+/*     If matrix lower bidiagonal, rotate to be upper bidiagonal */
+/*     by applying Givens rotations on the left */
+
+    wstart = 1;
+    qstart = 3;
+    if (icompq == 1) {
+	dcopy_(n, &d__[1], &c__1, &q[1], &c__1);
+	i__1 = *n - 1;
+	dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
+    }
+    if (iuplo == 2) {
+	qstart = 5;
+	if (icompq == 2) {
+	    wstart = (*n << 1) - 1;
+	}
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (icompq == 1) {
+		q[i__ + (*n << 1)] = cs;
+		q[i__ + *n * 3] = sn;
+	    } else if (icompq == 2) {
+		work[i__] = cs;
+		work[nm1 + i__] = -sn;
+	    }
+/* L10: */
+	}
+    }
+
+/*     If ICOMPQ = 0, use DLASDQ to compute the singular values. */
+
+    if (icompq == 0) {
+/*        Ignore WSTART, instead using WORK( 1 ), since the two vectors */
+/*        for CS and -SN above are added only if ICOMPQ == 2, */
+/*        and adding them exceeds documented WORK size of 4*n. */
+	dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		1], info);
+	goto L40;
+    }
+
+/*     If N is smaller than the minimum divide size SMLSIZ, then solve */
+/*     the problem with another solver. */
+
+    if (*n <= smlsiz) {
+	if (icompq == 2) {
+	    dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
+	    dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
+	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		    wstart], info);
+	} else if (icompq == 1) {
+	    iu = 1;
+	    ivt = iu + *n;
+	    dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n);
+	    dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n);
+	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
+		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
+		    iu + (qstart - 1) * *n], n, &work[wstart], info);
+	}
+	goto L40;
+    }
+
+    if (icompq == 2) {
+	dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
+	dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
+    }
+
+/*     Scale. */
+
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	return 0;
+    }
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, &
+	    ierr);
+
+    eps = dlamch_("Epsilon") * .9;
+
+    mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / 
+	    log(2.)) + 1;
+    smlszp = smlsiz + 1;
+
+    if (icompq == 1) {
+	iu = 1;
+	ivt = smlsiz + 1;
+	difl = ivt + smlszp;
+	difr = difl + mlvl;
+	z__ = difr + (mlvl << 1);
+	ic = z__ + mlvl;
+	is = ic + 1;
+	poles = is + 1;
+	givnum = poles + (mlvl << 1);
+
+	k = 1;
+	givptr = 2;
+	perm = 3;
+	givcol = perm + mlvl;
+    }
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) < eps) {
+	    d__[i__] = d_sign(&eps, &d__[i__]);
+	}
+/* L20: */
+    }
+
+    start = 1;
+    sqre = 0;
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
+
+/*           Subproblem found. First determine its size and then */
+/*           apply divide and conquer on it. */
+
+	    if (i__ < nm1) {
+
+/*              A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - start + 1;
+	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
+
+/*              A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - start + 1;
+	    } else {
+
+/*              A subproblem with E(NM1) small. This implies an */
+/*              1-by-1 subproblem at D(N). Solve this 1-by-1 problem */
+/*              first. */
+
+		nsize = i__ - start + 1;
+		if (icompq == 2) {
+		    u[*n + *n * u_dim1] = d_sign(&c_b15, &d__[*n]);
+		    vt[*n + *n * vt_dim1] = 1.;
+		} else if (icompq == 1) {
+		    q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]);
+		    q[*n + (smlsiz + qstart - 1) * *n] = 1.;
+		}
+		d__[*n] = (d__1 = d__[*n], abs(d__1));
+	    }
+	    if (icompq == 2) {
+		dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + 
+			start * u_dim1], ldu, &vt[start + start * vt_dim1], 
+			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
+	    } else {
+		dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
+			start], &q[start + (iu + qstart - 2) * *n], n, &q[
+			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
+			 &q[start + (difl + qstart - 2) * *n], &q[start + (
+			difr + qstart - 2) * *n], &q[start + (z__ + qstart - 
+			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
+			start + givptr * *n], &iq[start + givcol * *n], n, &
+			iq[start + perm * *n], &q[start + (givnum + qstart - 
+			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
+			start + (is + qstart - 2) * *n], &work[wstart], &
+			iwork[1], info);
+	    }
+	    if (*info != 0) {
+		return 0;
+	    }
+	    start = i__ + 1;
+	}
+/* L30: */
+    }
+
+/*     Unscale */
+
+    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr);
+L40:
+
+/*     Use Selection Sort to minimize swaps of singular vectors */
+
+    i__1 = *n;
+    for (ii = 2; ii <= i__1; ++ii) {
+	i__ = ii - 1;
+	kk = i__;
+	p = d__[i__];
+	i__2 = *n;
+	for (j = ii; j <= i__2; ++j) {
+	    if (d__[j] > p) {
+		kk = j;
+		p = d__[j];
+	    }
+/* L50: */
+	}
+	if (kk != i__) {
+	    d__[kk] = d__[i__];
+	    d__[i__] = p;
+	    if (icompq == 1) {
+		iq[i__] = kk;
+	    } else if (icompq == 2) {
+		dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
+			c__1);
+		dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
+	    }
+	} else if (icompq == 1) {
+	    iq[i__] = i__;
+	}
+/* L60: */
+    }
+
+/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
+
+    if (icompq == 1) {
+	if (iuplo == 1) {
+	    iq[*n] = 1;
+	} else {
+	    iq[*n] = 0;
+	}
+    }
+
+/*     If B is lower bidiagonal, update U by those Givens rotations */
+/*     which rotated B to be upper bidiagonal */
+
+    if (iuplo == 2 && icompq == 2) {
+	dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
+    }
+
+    return 0;
+
+/*     End of DBDSDC */
+
+} /* dbdsdc_ */
+
diff --git a/lapack-netlib/SRC/dbdsqr.c b/lapack-netlib/SRC/dbdsqr.c
new file mode 100644
index 000000000..d8d2f5b3f
--- /dev/null
+++ b/lapack-netlib/SRC/dbdsqr.c
@@ -0,0 +1,1397 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static doublereal c_b15 = -.125;
+static integer c__1 = 1;
+static doublereal c_b49 = 1.;
+static doublereal c_b72 = -1.;
+
+/* > \brief \b DBDSQR */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DBDSQR + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsqr.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsqr.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsqr.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, */
+/*                          LDU, C, LDC, WORK, INFO ) */
+
+/*       CHARACTER          UPLO */
+/*       INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU */
+/*       DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ), */
+/*      $                   VT( LDVT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DBDSQR computes the singular values and, optionally, the right and/or */
+/* > left singular vectors from the singular value decomposition (SVD) of */
+/* > a real N-by-N (upper or lower) bidiagonal matrix B using the implicit */
+/* > zero-shift QR algorithm.  The SVD of B has the form */
+/* > */
+/* >    B = Q * S * P**T */
+/* > */
+/* > where S is the diagonal matrix of singular values, Q is an orthogonal */
+/* > matrix of left singular vectors, and P is an orthogonal matrix of */
+/* > right singular vectors.  If left singular vectors are requested, this */
+/* > subroutine actually returns U*Q instead of Q, and, if right singular */
+/* > vectors are requested, this subroutine returns P**T*VT instead of */
+/* > P**T, for given real input matrices U and VT.  When U and VT are the */
+/* > orthogonal matrices that reduce a general matrix A to bidiagonal */
+/* > form:  A = U*B*VT, as computed by DGEBRD, then */
+/* > */
+/* >    A = (U*Q) * S * (P**T*VT) */
+/* > */
+/* > is the SVD of A.  Optionally, the subroutine may also compute Q**T*C */
+/* > for a given real input matrix C. */
+/* > */
+/* > See "Computing  Small Singular Values of Bidiagonal Matrices With */
+/* > Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */
+/* > LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, */
+/* > no. 5, pp. 873-912, Sept 1990) and */
+/* > "Accurate singular values and differential qd algorithms," by */
+/* > B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics */
+/* > Department, University of California at Berkeley, July 1992 */
+/* > for a detailed description of the algorithm. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] UPLO */
+/* > \verbatim */
+/* >          UPLO is CHARACTER*1 */
+/* >          = 'U':  B is upper bidiagonal; */
+/* >          = 'L':  B is lower bidiagonal. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix B.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NCVT */
+/* > \verbatim */
+/* >          NCVT is INTEGER */
+/* >          The number of columns of the matrix VT. NCVT >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRU */
+/* > \verbatim */
+/* >          NRU is INTEGER */
+/* >          The number of rows of the matrix U. NRU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NCC */
+/* > \verbatim */
+/* >          NCC is INTEGER */
+/* >          The number of columns of the matrix C. NCC >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] D */
+/* > \verbatim */
+/* >          D is DOUBLE PRECISION array, dimension (N) */
+/* >          On entry, the n diagonal elements of the bidiagonal matrix B. */
+/* >          On exit, if INFO=0, the singular values of B in decreasing */
+/* >          order. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] E */
+/* > \verbatim */
+/* >          E is DOUBLE PRECISION array, dimension (N-1) */
+/* >          On entry, the N-1 offdiagonal elements of the bidiagonal */
+/* >          matrix B. */
+/* >          On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E */
+/* >          will contain the diagonal and superdiagonal elements of a */
+/* >          bidiagonal matrix orthogonally equivalent to the one given */
+/* >          as input. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] VT */
+/* > \verbatim */
+/* >          VT is DOUBLE PRECISION array, dimension (LDVT, NCVT) */
+/* >          On entry, an N-by-NCVT matrix VT. */
+/* >          On exit, VT is overwritten by P**T * VT. */
+/* >          Not referenced if NCVT = 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVT */
+/* > \verbatim */
+/* >          LDVT is INTEGER */
+/* >          The leading dimension of the array VT. */
+/* >          LDVT >= f2cmax(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] U */
+/* > \verbatim */
+/* >          U is DOUBLE PRECISION array, dimension (LDU, N) */
+/* >          On entry, an NRU-by-N matrix U. */
+/* >          On exit, U is overwritten by U * Q. */
+/* >          Not referenced if NRU = 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U.  LDU >= f2cmax(1,NRU). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (LDC, NCC) */
+/* >          On entry, an N-by-NCC matrix C. */
+/* >          On exit, C is overwritten by Q**T * C. */
+/* >          Not referenced if NCC = 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. */
+/* >          LDC >= f2cmax(1,N) if NCC > 0; LDC >=1 if NCC = 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (4*(N-1)) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  If INFO = -i, the i-th argument had an illegal value */
+/* >          > 0: */
+/* >             if NCVT = NRU = NCC = 0, */
+/* >                = 1, a split was marked by a positive value in E */
+/* >                = 2, current block of Z not diagonalized after 30*N */
+/* >                     iterations (in inner while loop) */
+/* >                = 3, termination criterion of outer while loop not met */
+/* >                     (program created more than N unreduced blocks) */
+/* >             else NCVT = NRU = NCC = 0, */
+/* >                   the algorithm did not converge; D and E contain the */
+/* >                   elements of a bidiagonal matrix which is orthogonally */
+/* >                   similar to the input matrix B;  if INFO = i, i */
+/* >                   elements of E have not converged to zero. */
+/* > \endverbatim */
+
+/* > \par Internal Parameters: */
+/*  ========================= */
+/* > */
+/* > \verbatim */
+/* >  TOLMUL  DOUBLE PRECISION, default = f2cmax(10,f2cmin(100,EPS**(-1/8))) */
+/* >          TOLMUL controls the convergence criterion of the QR loop. */
+/* >          If it is positive, TOLMUL*EPS is the desired relative */
+/* >             precision in the computed singular values. */
+/* >          If it is negative, abs(TOLMUL*EPS*sigma_max) is the */
+/* >             desired absolute accuracy in the computed singular */
+/* >             values (corresponds to relative accuracy */
+/* >             abs(TOLMUL*EPS) in the largest singular value. */
+/* >          abs(TOLMUL) should be between 1 and 1/EPS, and preferably */
+/* >             between 10 (for fast convergence) and .1/EPS */
+/* >             (for there to be some accuracy in the results). */
+/* >          Default is to lose at either one eighth or 2 of the */
+/* >             available decimal digits in each computed singular value */
+/* >             (whichever is smaller). */
+/* > */
+/* >  MAXITR  INTEGER, default = 6 */
+/* >          MAXITR controls the maximum number of passes of the */
+/* >          algorithm through its inner loop. The algorithms stops */
+/* >          (and so fails to converge) if the number of passes */
+/* >          through the inner loop exceeds MAXITR*N**2. */
+/* > */
+/* > \endverbatim */
+
+/* > \par Note: */
+/*  =========== */
+/* > */
+/* > \verbatim */
+/* >  Bug report from Cezary Dendek. */
+/* >  On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is */
+/* >  removed since it can overflow pretty easily (for N larger or equal */
+/* >  than 18,919). We instead use MAXITDIVN = MAXITR*N. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup auxOTHERcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
+	nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, 
+	integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *
+	ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, 
+	    i__2;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Local variables */
+    doublereal abse;
+    integer idir;
+    doublereal abss;
+    integer oldm;
+    doublereal cosl;
+    integer isub, iter;
+    doublereal unfl, sinl, cosr, smin, smax, sinr;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *);
+    integer iterdivn;
+    extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal 
+	    *, doublereal *, doublereal *);
+    doublereal f, g, h__;
+    integer i__, j, m;
+    doublereal r__;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    extern logical lsame_(char *, char *);
+    doublereal oldcs;
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, 
+	    integer *, doublereal *, doublereal *, doublereal *, integer *);
+    integer oldll;
+    doublereal shift, sigmn, oldsn;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    doublereal sminl, sigmx;
+    logical lower;
+    integer maxitdivn;
+    extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *,
+	     doublereal *, integer *), dlasv2_(doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *);
+    doublereal cs;
+    integer ll;
+    extern doublereal dlamch_(char *);
+    doublereal sn, mu;
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *), xerbla_(char *, 
+	    integer *, ftnlen);
+    doublereal sminoa, thresh;
+    logical rotate;
+    integer nm1;
+    doublereal tolmul;
+    integer nm12, nm13, lll;
+    doublereal eps, sll, tol;
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1 * 1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lower = lsame_(uplo, "L");
+    if (! lsame_(uplo, "U") && ! lower) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ncvt < 0) {
+	*info = -3;
+    } else if (*nru < 0) {
+	*info = -4;
+    } else if (*ncc < 0) {
+	*info = -5;
+    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < f2cmax(1,*n)) {
+	*info = -9;
+    } else if (*ldu < f2cmax(1,*nru)) {
+	*info = -11;
+    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < f2cmax(1,*n)) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DBDSQR", &i__1, (ftnlen)6);
+	return 0;
+    }
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	goto L160;
+    }
+
+/*     ROTATE is true if any singular vectors desired, false otherwise */
+
+    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
+
+/*     If no singular vectors desired, use qd algorithm */
+
+    if (! rotate) {
+	dlasq1_(n, &d__[1], &e[1], &work[1], info);
+
+/*     If INFO equals 2, dqds didn't finish, try to finish */
+
+	if (*info != 2) {
+	    return 0;
+	}
+	*info = 0;
+    }
+
+    nm1 = *n - 1;
+    nm12 = nm1 + nm1;
+    nm13 = nm12 + nm1;
+    idir = 0;
+
+/*     Get machine constants */
+
+    eps = dlamch_("Epsilon");
+    unfl = dlamch_("Safe minimum");
+
+/*     If matrix lower bidiagonal, rotate to be upper bidiagonal */
+/*     by applying Givens rotations on the left */
+
+    if (lower) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    work[i__] = cs;
+	    work[nm1 + i__] = sn;
+/* L10: */
+	}
+
+/*        Update singular vectors if desired */
+
+	if (*nru > 0) {
+	    dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], 
+		    ldu);
+	}
+	if (*ncc > 0) {
+	    dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
+		     ldc);
+	}
+    }
+
+/*     Compute singular values to relative accuracy TOL */
+/*     (By setting TOL to be negative, algorithm will compute */
+/*     singular values to absolute accuracy ABS(TOL)*norm(input matrix)) */
+
+/* Computing MAX */
+/* Computing MIN */
+    d__3 = 100., d__4 = pow_dd(&eps, &c_b15);
+    d__1 = 10., d__2 = f2cmin(d__3,d__4);
+    tolmul = f2cmax(d__1,d__2);
+    tol = tolmul * eps;
+
+/*     Compute approximate maximum, minimum singular values */
+
+    smax = 0.;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
+	smax = f2cmax(d__2,d__3);
+/* L20: */
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
+	smax = f2cmax(d__2,d__3);
+/* L30: */
+    }
+    sminl = 0.;
+    if (tol >= 0.) {
+
+/*        Relative accuracy desired */
+
+	sminoa = abs(d__[1]);
+	if (sminoa == 0.) {
+	    goto L50;
+	}
+	mu = sminoa;
+	i__1 = *n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1]
+		    , abs(d__1))));
+	    sminoa = f2cmin(sminoa,mu);
+	    if (sminoa == 0.) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	sminoa /= sqrt((doublereal) (*n));
+/* Computing MAX */
+	d__1 = tol * sminoa, d__2 = *n * (*n * unfl) * 6;
+	thresh = f2cmax(d__1,d__2);
+    } else {
+
+/*        Absolute accuracy desired */
+
+/* Computing MAX */
+	d__1 = abs(tol) * smax, d__2 = *n * (*n * unfl) * 6;
+	thresh = f2cmax(d__1,d__2);
+    }
+
+/*     Prepare for main iteration loop for the singular values */
+/*     (MAXIT is the maximum number of passes through the inner */
+/*     loop permitted before nonconvergence signalled.) */
+
+    maxitdivn = *n * 6;
+    iterdivn = 0;
+    iter = -1;
+    oldll = -1;
+    oldm = -1;
+
+/*     M points to last element of unconverged part of matrix */
+
+    m = *n;
+
+/*     Begin main iteration loop */
+
+L60:
+
+/*     Check for convergence or exceeding iteration count */
+
+    if (m <= 1) {
+	goto L160;
+    }
+
+    if (iter >= *n) {
+	iter -= *n;
+	++iterdivn;
+	if (iterdivn >= maxitdivn) {
+	    goto L200;
+	}
+    }
+
+/*     Find diagonal block of matrix to work on */
+
+    if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
+	d__[m] = 0.;
+    }
+    smax = (d__1 = d__[m], abs(d__1));
+    smin = smax;
+    i__1 = m - 1;
+    for (lll = 1; lll <= i__1; ++lll) {
+	ll = m - lll;
+	abss = (d__1 = d__[ll], abs(d__1));
+	abse = (d__1 = e[ll], abs(d__1));
+	if (tol < 0. && abss <= thresh) {
+	    d__[ll] = 0.;
+	}
+	if (abse <= thresh) {
+	    goto L80;
+	}
+	smin = f2cmin(smin,abss);
+/* Computing MAX */
+	d__1 = f2cmax(smax,abss);
+	smax = f2cmax(d__1,abse);
+/* L70: */
+    }
+    ll = 0;
+    goto L90;
+L80:
+    e[ll] = 0.;
+
+/*     Matrix splits since E(LL) = 0 */
+
+    if (ll == m - 1) {
+
+/*        Convergence of bottom singular value, return to top of loop */
+
+	--m;
+	goto L60;
+    }
+L90:
+    ++ll;
+
+/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
+
+    if (ll == m - 1) {
+
+/*        2 by 2 block, handle separately */
+
+	dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
+		 &sinl, &cosl);
+	d__[m - 1] = sigmx;
+	e[m - 1] = 0.;
+	d__[m] = sigmn;
+
+/*        Compute singular vectors, if desired */
+
+	if (*ncvt > 0) {
+	    drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
+		    cosr, &sinr);
+	}
+	if (*nru > 0) {
+	    drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
+		    c__1, &cosl, &sinl);
+	}
+	if (*ncc > 0) {
+	    drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
+		    cosl, &sinl);
+	}
+	m += -2;
+	goto L60;
+    }
+
+/*     If working on new submatrix, choose shift direction */
+/*     (from larger end diagonal element towards smaller) */
+
+    if (ll > oldm || m < oldll) {
+	if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
+
+/*           Chase bulge from top (big end) to bottom (small end) */
+
+	    idir = 1;
+	} else {
+
+/*           Chase bulge from bottom (big end) to top (small end) */
+
+	    idir = 2;
+	}
+    }
+
+/*     Apply convergence tests */
+
+    if (idir == 1) {
+
+/*        Run convergence test in forward direction */
+/*        First apply standard test to bottom of matrix */
+
+	if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(
+		d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) 
+		{
+	    e[m - 1] = 0.;
+	    goto L60;
+	}
+
+	if (tol >= 0.) {
+
+/*           If relative accuracy desired, */
+/*           apply convergence criterion forward */
+
+	    mu = (d__1 = d__[ll], abs(d__1));
+	    sminl = mu;
+	    i__1 = m - 1;
+	    for (lll = ll; lll <= i__1; ++lll) {
+		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+		    e[lll] = 0.;
+		    goto L60;
+		}
+		mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[
+			lll], abs(d__1))));
+		sminl = f2cmin(sminl,mu);
+/* L100: */
+	    }
+	}
+
+    } else {
+
+/*        Run convergence test in backward direction */
+/*        First apply standard test to top of matrix */
+
+	if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)
+		) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
+	    e[ll] = 0.;
+	    goto L60;
+	}
+
+	if (tol >= 0.) {
+
+/*           If relative accuracy desired, */
+/*           apply convergence criterion backward */
+
+	    mu = (d__1 = d__[m], abs(d__1));
+	    sminl = mu;
+	    i__1 = ll;
+	    for (lll = m - 1; lll >= i__1; --lll) {
+		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+		    e[lll] = 0.;
+		    goto L60;
+		}
+		mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll]
+			, abs(d__1))));
+		sminl = f2cmin(sminl,mu);
+/* L110: */
+	    }
+	}
+    }
+    oldll = ll;
+    oldm = m;
+
+/*     Compute shift.  First, test if shifting would ruin relative */
+/*     accuracy, and if so set the shift to zero. */
+
+/* Computing MAX */
+    d__1 = eps, d__2 = tol * .01;
+    if (tol >= 0. && *n * tol * (sminl / smax) <= f2cmax(d__1,d__2)) {
+
+/*        Use a zero shift to avoid loss of relative accuracy */
+
+	shift = 0.;
+    } else {
+
+/*        Compute the shift from 2-by-2 block at end of matrix */
+
+	if (idir == 1) {
+	    sll = (d__1 = d__[ll], abs(d__1));
+	    dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
+	} else {
+	    sll = (d__1 = d__[m], abs(d__1));
+	    dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
+	}
+
+/*        Test if shift negligible, and if so set to zero */
+
+	if (sll > 0.) {
+/* Computing 2nd power */
+	    d__1 = shift / sll;
+	    if (d__1 * d__1 < eps) {
+		shift = 0.;
+	    }
+	}
+    }
+
+/*     Increment iteration count */
+
+    iter = iter + m - ll;
+
+/*     If SHIFT = 0, do simplified QR iteration */
+
+    if (shift == 0.) {
+	if (idir == 1) {
+
+/*           Chase bulge from top to bottom */
+/*           Save cosines and sines for later singular vector updates */
+
+	    cs = 1.;
+	    oldcs = 1.;
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		d__1 = d__[i__] * cs;
+		dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = oldsn * r__;
+		}
+		d__1 = oldcs * r__;
+		d__2 = d__[i__ + 1] * sn;
+		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll + 1] = cs;
+		work[i__ - ll + 1 + nm1] = sn;
+		work[i__ - ll + 1 + nm12] = oldcs;
+		work[i__ - ll + 1 + nm13] = oldsn;
+/* L120: */
+	    }
+	    h__ = d__[m] * cs;
+	    d__[m] = h__ * oldcs;
+	    e[m - 1] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+		e[m - 1] = 0.;
+	    }
+
+	} else {
+
+/*           Chase bulge from bottom to top */
+/*           Save cosines and sines for later singular vector updates */
+
+	    cs = 1.;
+	    oldcs = 1.;
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		d__1 = d__[i__] * cs;
+		dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
+		if (i__ < m) {
+		    e[i__] = oldsn * r__;
+		}
+		d__1 = oldcs * r__;
+		d__2 = d__[i__ - 1] * sn;
+		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll] = cs;
+		work[i__ - ll + nm1] = -sn;
+		work[i__ - ll + nm12] = oldcs;
+		work[i__ - ll + nm13] = -oldsn;
+/* L130: */
+	    }
+	    h__ = d__[ll] * cs;
+	    d__[ll] = h__ * oldcs;
+	    e[ll] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+		e[ll] = 0.;
+	    }
+	}
+    } else {
+
+/*        Use nonzero shift */
+
+	if (idir == 1) {
+
+/*           Chase bulge from top to bottom */
+/*           Save cosines and sines for later singular vector updates */
+
+	    f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[
+		    ll]) + shift / d__[ll]);
+	    g = e[ll];
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		dlartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__];
+		e[i__] = cosr * e[i__] - sinr * d__[i__];
+		g = sinr * d__[i__ + 1];
+		d__[i__ + 1] = cosr * d__[i__ + 1];
+		dlartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__] + sinl * d__[i__ + 1];
+		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
+		if (i__ < m - 1) {
+		    g = sinl * e[i__ + 1];
+		    e[i__ + 1] = cosl * e[i__ + 1];
+		}
+		work[i__ - ll + 1] = cosr;
+		work[i__ - ll + 1 + nm1] = sinr;
+		work[i__ - ll + 1 + nm12] = cosl;
+		work[i__ - ll + 1 + nm13] = sinl;
+/* L140: */
+	    }
+	    e[m - 1] = f;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+		e[m - 1] = 0.;
+	    }
+
+	} else {
+
+/*           Chase bulge from bottom to top */
+/*           Save cosines and sines for later singular vector updates */
+
+	    f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[m]
+		    ) + shift / d__[m]);
+	    g = e[m - 1];
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		dlartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ < m) {
+		    e[i__] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__ - 1];
+		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
+		g = sinr * d__[i__ - 1];
+		d__[i__ - 1] = cosr * d__[i__ - 1];
+		dlartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
+		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
+		if (i__ > ll + 1) {
+		    g = sinl * e[i__ - 2];
+		    e[i__ - 2] = cosl * e[i__ - 2];
+		}
+		work[i__ - ll] = cosr;
+		work[i__ - ll + nm1] = -sinr;
+		work[i__ - ll + nm12] = cosl;
+		work[i__ - ll + nm13] = -sinl;
+/* L150: */
+	    }
+	    e[ll] = f;
+
+/*           Test convergence */
+
+	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+		e[ll] = 0.;
+	    }
+
+/*           Update singular vectors if desired */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+	}
+    }
+
+/*     QR iteration finished, go back and check convergence */
+
+    goto L60;
+
+/*     All singular values converged, so make them positive */
+
+L160:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (d__[i__] < 0.) {
+	    d__[i__] = -d__[i__];
+
+/*           Change sign of singular vectors, if desired */
+
+	    if (*ncvt > 0) {
+		dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt);
+	    }
+	}
+/* L170: */
+    }
+
+/*     Sort the singular values into decreasing order (insertion sort on */
+/*     singular values, but only one transposition per singular vector) */
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Scan for smallest D(I) */
+
+	isub = 1;
+	smin = d__[1];
+	i__2 = *n + 1 - i__;
+	for (j = 2; j <= i__2; ++j) {
+	    if (d__[j] <= smin) {
+		isub = j;
+		smin = d__[j];
+	    }
+/* L180: */
+	}
+	if (isub != *n + 1 - i__) {
+
+/*           Swap singular values and vectors */
+
+	    d__[isub] = d__[*n + 1 - i__];
+	    d__[*n + 1 - i__] = smin;
+	    if (*ncvt > 0) {
+		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + 
+			vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * 
+			u_dim1 + 1], &c__1);
+	    }
+	    if (*ncc > 0) {
+		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + 
+			c_dim1], ldc);
+	    }
+	}
+/* L190: */
+    }
+    goto L220;
+
+/*     Maximum number of iterations exceeded, failure to converge */
+
+L200:
+    *info = 0;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.) {
+	    ++(*info);
+	}
+/* L210: */
+    }
+L220:
+    return 0;
+
+/*     End of DBDSQR */
+
+} /* dbdsqr_ */
+
diff --git a/lapack-netlib/SRC/dbdsvdx.c b/lapack-netlib/SRC/dbdsvdx.c
new file mode 100644
index 000000000..4d2f51eae
--- /dev/null
+++ b/lapack-netlib/SRC/dbdsvdx.c
@@ -0,0 +1,1349 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) ceil(w)
+#define myhuge_(w) HUGE_VAL
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static doublereal c_b10 = 1.;
+static doublereal c_b14 = -.125;
+static integer c__1 = 1;
+static doublereal c_b19 = 0.;
+static integer c__2 = 2;
+
+/* > \brief \b DBDSVDX */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DBDSVDX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbdsvdx
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbdsvdx
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbdsvdx
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*     SUBROUTINE DBDSVDX( UPLO, JOBZ, RANGE, N, D, E, VL, VU, IL, IU, */
+/*    $                    NS, S, Z, LDZ, WORK, IWORK, INFO ) */
+
+/*      CHARACTER          JOBZ, RANGE, UPLO */
+/*      INTEGER            IL, INFO, IU, LDZ, N, NS */
+/*      DOUBLE PRECISION   VL, VU */
+/*      INTEGER            IWORK( * ) */
+/*      DOUBLE PRECISION   D( * ), E( * ), S( * ), WORK( * ), */
+/*                         Z( LDZ, * ) */
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  DBDSVDX computes the singular value decomposition (SVD) of a real */
+/* >  N-by-N (upper or lower) bidiagonal matrix B, B = U * S * VT, */
+/* >  where S is a diagonal matrix with non-negative diagonal elements */
+/* >  (the singular values of B), and U and VT are orthogonal matrices */
+/* >  of left and right singular vectors, respectively. */
+/* > */
+/* >  Given an upper bidiagonal B with diagonal D = [ d_1 d_2 ... d_N ] */
+/* >  and superdiagonal E = [ e_1 e_2 ... e_N-1 ], DBDSVDX computes the */
+/* >  singular value decompositon of B through the eigenvalues and */
+/* >  eigenvectors of the N*2-by-N*2 tridiagonal matrix */
+/* > */
+/* >        |  0  d_1                | */
+/* >        | d_1  0  e_1            | */
+/* >  TGK = |     e_1  0  d_2        | */
+/* >        |         d_2  .   .     | */
+/* >        |              .   .   . | */
+/* > */
+/* >  If (s,u,v) is a singular triplet of B with ||u|| = ||v|| = 1, then */
+/* >  (+/-s,q), ||q|| = 1, are eigenpairs of TGK, with q = P * ( u' +/-v' ) / */
+/* >  sqrt(2) = ( v_1 u_1 v_2 u_2 ... v_n u_n ) / sqrt(2), and */
+/* >  P = [ e_{n+1} e_{1} e_{n+2} e_{2} ... ]. */
+/* > */
+/* >  Given a TGK matrix, one can either a) compute -s,-v and change signs */
+/* >  so that the singular values (and corresponding vectors) are already in */
+/* >  descending order (as in DGESVD/DGESDD) or b) compute s,v and reorder */
+/* >  the values (and corresponding vectors). DBDSVDX implements a) by */
+/* >  calling DSTEVX (bisection plus inverse iteration, to be replaced */
+/* >  with a version of the Multiple Relative Robust Representation */
+/* >  algorithm. (See P. Willems and B. Lang, A framework for the MR^3 */
+/* >  algorithm: theory and implementation, SIAM J. Sci. Comput., */
+/* >  35:740-766, 2013.) */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] UPLO */
+/* > \verbatim */
+/* >          UPLO is CHARACTER*1 */
+/* >          = 'U':  B is upper bidiagonal; */
+/* >          = 'L':  B is lower bidiagonal. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBZ */
+/* > \verbatim */
+/* >          JOBZ is CHARACTER*1 */
+/* >          = 'N':  Compute singular values only; */
+/* >          = 'V':  Compute singular values and singular vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RANGE */
+/* > \verbatim */
+/* >          RANGE is CHARACTER*1 */
+/* >          = 'A': all singular values will be found. */
+/* >          = 'V': all singular values in the half-open interval [VL,VU) */
+/* >                 will be found. */
+/* >          = 'I': the IL-th through IU-th singular values will be found. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the bidiagonal matrix.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] D */
+/* > \verbatim */
+/* >          D is DOUBLE PRECISION array, dimension (N) */
+/* >          The n diagonal elements of the bidiagonal matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] E */
+/* > \verbatim */
+/* >          E is DOUBLE PRECISION array, dimension (f2cmax(1,N-1)) */
+/* >          The (n-1) superdiagonal elements of the bidiagonal matrix */
+/* >          B in elements 1 to N-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] VL */
+/* > \verbatim */
+/* >         VL is DOUBLE PRECISION */
+/* >          If RANGE='V', the lower bound of the interval to */
+/* >          be searched for singular values. VU > VL. */
+/* >          Not referenced if RANGE = 'A' or 'I'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] VU */
+/* > \verbatim */
+/* >         VU is DOUBLE PRECISION */
+/* >          If RANGE='V', the upper bound of the interval to */
+/* >          be searched for singular values. VU > VL. */
+/* >          Not referenced if RANGE = 'A' or 'I'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IL */
+/* > \verbatim */
+/* >          IL is INTEGER */
+/* >          If RANGE='I', the index of the */
+/* >          smallest singular value to be returned. */
+/* >          1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */
+/* >          Not referenced if RANGE = 'A' or 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IU */
+/* > \verbatim */
+/* >          IU is INTEGER */
+/* >          If RANGE='I', the index of the */
+/* >          largest singular value to be returned. */
+/* >          1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */
+/* >          Not referenced if RANGE = 'A' or 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] NS */
+/* > \verbatim */
+/* >          NS is INTEGER */
+/* >          The total number of singular values found.  0 <= NS <= N. */
+/* >          If RANGE = 'A', NS = N, and if RANGE = 'I', NS = IU-IL+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is DOUBLE PRECISION array, dimension (N) */
+/* >          The first NS elements contain the selected singular values in */
+/* >          ascending order. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] Z */
+/* > \verbatim */
+/* >          Z is DOUBLE PRECISION array, dimension (2*N,K) */
+/* >          If JOBZ = 'V', then if INFO = 0 the first NS columns of Z */
+/* >          contain the singular vectors of the matrix B corresponding to */
+/* >          the selected singular values, with U in rows 1 to N and V */
+/* >          in rows N+1 to N*2, i.e. */
+/* >          Z = [ U ] */
+/* >              [ V ] */
+/* >          If JOBZ = 'N', then Z is not referenced. */
+/* >          Note: The user must ensure that at least K = NS+1 columns are */
+/* >          supplied in the array Z; if RANGE = 'V', the exact value of */
+/* >          NS is not known in advance and an upper bound must be used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDZ */
+/* > \verbatim */
+/* >          LDZ is INTEGER */
+/* >          The leading dimension of the array Z. LDZ >= 1, and if */
+/* >          JOBZ = 'V', LDZ >= f2cmax(2,N*2). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (14*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (12*N) */
+/* >          If JOBZ = 'V', then if INFO = 0, the first NS elements of */
+/* >          IWORK are zero. If INFO > 0, then IWORK contains the indices */
+/* >          of the eigenvectors that failed to converge in DSTEVX. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, then i eigenvectors failed to converge */
+/* >                   in DSTEVX. The indices of the eigenvectors */
+/* >                   (as returned by DSTEVX) are stored in the */
+/* >                   array IWORK. */
+/* >                if INFO = N*2 + 1, an internal error occurred. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup doubleOTHEReigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int dbdsvdx_(char *uplo, char *jobz, char *range, integer *n,
+	 doublereal *d__, doublereal *e, doublereal *vl, doublereal *vu, 
+	integer *il, integer *iu, integer *ns, doublereal *s, doublereal *z__,
+	 integer *ldz, doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Local variables */
+    doublereal emin;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer ntgk;
+    doublereal smin, smax, nrmu, nrmv;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    logical sveq0;
+    integer i__, idbeg, j, k;
+    doublereal sqrt2;
+    integer idend;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    integer isbeg;
+    extern logical lsame_(char *, char *);
+    integer idtgk, ietgk, iltgk, itemp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer icolz;
+    logical allsv;
+    integer idptr;
+    logical indsv;
+    integer ieptr, iutgk;
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *);
+    doublereal vltgk;
+    logical lower;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    doublereal zjtji;
+    logical split, valsv;
+    integer isplt;
+    doublereal ortol, vutgk;
+    logical wantz;
+    char rngvx[1];
+    integer irowu, irowv, irowz;
+    extern doublereal dlamch_(char *);
+    integer iifail;
+    doublereal mu;
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlaset_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen);
+    doublereal abstol, thresh;
+    integer iiwork;
+    extern /* Subroutine */ int dstevx_(char *, char *, integer *, doublereal 
+	    *, doublereal *, doublereal *, doublereal *, integer *, integer *,
+	     doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *, integer *), 
+	    mecago_();
+    doublereal eps;
+    integer nsl;
+    doublereal tol, ulp;
+    integer nru, nrv;
+
+
+/*  -- LAPACK driver routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    --s;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1 * 1;
+    z__ -= z_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    allsv = lsame_(range, "A");
+    valsv = lsame_(range, "V");
+    indsv = lsame_(range, "I");
+    wantz = lsame_(jobz, "V");
+    lower = lsame_(uplo, "L");
+
+    *info = 0;
+    if (! lsame_(uplo, "U") && ! lower) {
+	*info = -1;
+    } else if (! (wantz || lsame_(jobz, "N"))) {
+	*info = -2;
+    } else if (! (allsv || valsv || indsv)) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*n > 0) {
+	if (valsv) {
+	    if (*vl < 0.) {
+		*info = -7;
+	    } else if (*vu <= *vl) {
+		*info = -8;
+	    }
+	} else if (indsv) {
+	    if (*il < 1 || *il > f2cmax(1,*n)) {
+		*info = -9;
+	    } else if (*iu < f2cmin(*n,*il) || *iu > *n) {
+		*info = -10;
+	    }
+	}
+    }
+    if (*info == 0) {
+	if (*ldz < 1 || wantz && *ldz < *n << 1) {
+	    *info = -14;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DBDSVDX", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+/*     Quick return if possible (N.LE.1) */
+
+    *ns = 0;
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	if (allsv || indsv) {
+	    *ns = 1;
+	    s[1] = abs(d__[1]);
+	} else {
+	    if (*vl < abs(d__[1]) && *vu >= abs(d__[1])) {
+		*ns = 1;
+		s[1] = abs(d__[1]);
+	    }
+	}
+	if (wantz) {
+	    z__[z_dim1 + 1] = d_sign(&c_b10, &d__[1]);
+	    z__[z_dim1 + 2] = 1.;
+	}
+	return 0;
+    }
+
+    abstol = dlamch_("Safe Minimum") * 2;
+    ulp = dlamch_("Precision");
+    eps = dlamch_("Epsilon");
+    sqrt2 = sqrt(2.);
+    ortol = sqrt(ulp);
+
+/*     Criterion for splitting is taken from DBDSQR when singular */
+/*     values are computed to relative accuracy TOL. (See J. Demmel and */
+/*     W. Kahan, Accurate singular values of bidiagonal matrices, SIAM */
+/*     J. Sci. and Stat. Comput., 11:873–912, 1990.) */
+
+/* Computing MAX */
+/* Computing MIN */
+    d__3 = 100., d__4 = pow_dd(&eps, &c_b14);
+    d__1 = 10., d__2 = f2cmin(d__3,d__4);
+    tol = f2cmax(d__1,d__2) * eps;
+
+/*     Compute approximate maximum, minimum singular values. */
+
+    i__ = idamax_(n, &d__[1], &c__1);
+    smax = (d__1 = d__[i__], abs(d__1));
+    i__1 = *n - 1;
+    i__ = idamax_(&i__1, &e[1], &c__1);
+/* Computing MAX */
+    d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
+    smax = f2cmax(d__2,d__3);
+
+/*     Compute threshold for neglecting D's and E's. */
+
+    smin = abs(d__[1]);
+    if (smin != 0.) {
+	mu = smin;
+	i__1 = *n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1]
+		    , abs(d__1))));
+	    smin = f2cmin(smin,mu);
+	    if (smin == 0.) {
+		myexit_();
+	    }
+	}
+    }
+    smin /= sqrt((doublereal) (*n));
+    thresh = tol * smin;
+
+/*     Check for zeros in D and E (splits), i.e. submatrices. */
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) <= thresh) {
+	    d__[i__] = 0.;
+	}
+	if ((d__1 = e[i__], abs(d__1)) <= thresh) {
+	    e[i__] = 0.;
+	}
+    }
+    if ((d__1 = d__[*n], abs(d__1)) <= thresh) {
+	d__[*n] = 0.;
+    }
+
+/*     Pointers for arrays used by DSTEVX. */
+
+    idtgk = 1;
+    ietgk = idtgk + (*n << 1);
+    itemp = ietgk + (*n << 1);
+    iifail = 1;
+    iiwork = iifail + (*n << 1);
+
+/*     Set RNGVX, which corresponds to RANGE for DSTEVX in TGK mode. */
+/*     VL,VU or IL,IU are redefined to conform to implementation a) */
+/*     described in the leading comments. */
+
+    iltgk = 0;
+    iutgk = 0;
+    vltgk = 0.;
+    vutgk = 0.;
+
+    if (allsv) {
+
+/*        All singular values will be found. We aim at -s (see */
+/*        leading comments) with RNGVX = 'I'. IL and IU are set */
+/*        later (as ILTGK and IUTGK) according to the dimension */
+/*        of the active submatrix. */
+
+	*(unsigned char *)rngvx = 'I';
+	if (wantz) {
+	    i__1 = *n << 1;
+	    i__2 = *n + 1;
+	    dlaset_("F", &i__1, &i__2, &c_b19, &c_b19, &z__[z_offset], ldz);
+	}
+    } else if (valsv) {
+
+/*        Find singular values in a half-open interval. We aim */
+/*        at -s (see leading comments) and we swap VL and VU */
+/*        (as VUTGK and VLTGK), changing their signs. */
+
+	*(unsigned char *)rngvx = 'V';
+	vltgk = -(*vu);
+	vutgk = -(*vl);
+	i__1 = idtgk + (*n << 1) - 1;
+	for (i__ = idtgk; i__ <= i__1; ++i__) {
+	    work[i__] = 0.;
+	}
+/*         WORK( IDTGK:IDTGK+2*N-1 ) = ZERO */
+	dcopy_(n, &d__[1], &c__1, &work[ietgk], &c__2);
+	i__1 = *n - 1;
+	dcopy_(&i__1, &e[1], &c__1, &work[ietgk + 1], &c__2);
+	i__1 = *n << 1;
+	dstevx_("N", "V", &i__1, &work[idtgk], &work[ietgk], &vltgk, &vutgk, &
+		iltgk, &iltgk, &abstol, ns, &s[1], &z__[z_offset], ldz, &work[
+		itemp], &iwork[iiwork], &iwork[iifail], info);
+	if (*ns == 0) {
+	    return 0;
+	} else {
+	    if (wantz) {
+		i__1 = *n << 1;
+		dlaset_("F", &i__1, ns, &c_b19, &c_b19, &z__[z_offset], ldz);
+	    }
+	}
+    } else if (indsv) {
+
+/*        Find the IL-th through the IU-th singular values. We aim */
+/*        at -s (see leading comments) and indices are mapped into */
+/*        values, therefore mimicking DSTEBZ, where */
+
+/*        GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN */
+/*        GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN */
+
+	iltgk = *il;
+	iutgk = *iu;
+	*(unsigned char *)rngvx = 'V';
+	i__1 = idtgk + (*n << 1) - 1;
+	for (i__ = idtgk; i__ <= i__1; ++i__) {
+	    work[i__] = 0.;
+	}
+/*         WORK( IDTGK:IDTGK+2*N-1 ) = ZERO */
+	dcopy_(n, &d__[1], &c__1, &work[ietgk], &c__2);
+	i__1 = *n - 1;
+	dcopy_(&i__1, &e[1], &c__1, &work[ietgk + 1], &c__2);
+	i__1 = *n << 1;
+	dstevx_("N", "I", &i__1, &work[idtgk], &work[ietgk], &vltgk, &vltgk, &
+		iltgk, &iltgk, &abstol, ns, &s[1], &z__[z_offset], ldz, &work[
+		itemp], &iwork[iiwork], &iwork[iifail], info);
+	vltgk = s[1] - smax * 2. * ulp * *n;
+	i__1 = idtgk + (*n << 1) - 1;
+	for (i__ = idtgk; i__ <= i__1; ++i__) {
+	    work[i__] = 0.;
+	}
+/*         WORK( IDTGK:IDTGK+2*N-1 ) = ZERO */
+	dcopy_(n, &d__[1], &c__1, &work[ietgk], &c__2);
+	i__1 = *n - 1;
+	dcopy_(&i__1, &e[1], &c__1, &work[ietgk + 1], &c__2);
+	i__1 = *n << 1;
+	dstevx_("N", "I", &i__1, &work[idtgk], &work[ietgk], &vutgk, &vutgk, &
+		iutgk, &iutgk, &abstol, ns, &s[1], &z__[z_offset], ldz, &work[
+		itemp], &iwork[iiwork], &iwork[iifail], info);
+	vutgk = s[1] + smax * 2. * ulp * *n;
+	vutgk = f2cmin(vutgk,0.);
+
+/*        If VLTGK=VUTGK, DSTEVX returns an error message, */
+/*        so if needed we change VUTGK slightly. */
+
+	if (vltgk == vutgk) {
+	    vltgk -= tol;
+	}
+
+	if (wantz) {
+	    i__1 = *n << 1;
+	    i__2 = *iu - *il + 1;
+	    dlaset_("F", &i__1, &i__2, &c_b19, &c_b19, &z__[z_offset], ldz);
+	}
+    }
+
+/*     Initialize variables and pointers for S, Z, and WORK. */
+
+/*     NRU, NRV: number of rows in U and V for the active submatrix */
+/*     IDBEG, ISBEG: offsets for the entries of D and S */
+/*     IROWZ, ICOLZ: offsets for the rows and columns of Z */
+/*     IROWU, IROWV: offsets for the rows of U and V */
+
+    *ns = 0;
+    nru = 0;
+    nrv = 0;
+    idbeg = 1;
+    isbeg = 1;
+    irowz = 1;
+    icolz = 1;
+    irowu = 2;
+    irowv = 1;
+    split = FALSE_;
+    sveq0 = FALSE_;
+
+/*     Form the tridiagonal TGK matrix. */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	s[i__] = 0.;
+    }
+/*      S( 1:N ) = ZERO */
+    work[ietgk + (*n << 1) - 1] = 0.;
+    i__1 = idtgk + (*n << 1) - 1;
+    for (i__ = idtgk; i__ <= i__1; ++i__) {
+	work[i__] = 0.;
+    }
+/*      WORK( IDTGK:IDTGK+2*N-1 ) = ZERO */
+    dcopy_(n, &d__[1], &c__1, &work[ietgk], &c__2);
+    i__1 = *n - 1;
+    dcopy_(&i__1, &e[1], &c__1, &work[ietgk + 1], &c__2);
+
+
+/*     Check for splits in two levels, outer level */
+/*     in E and inner level in D. */
+
+    i__1 = *n << 1;
+    for (ieptr = 2; ieptr <= i__1; ieptr += 2) {
+	if (work[ietgk + ieptr - 1] == 0.) {
+
+/*           Split in E (this piece of B is square) or bottom */
+/*           of the (input bidiagonal) matrix. */
+
+	    isplt = idbeg;
+	    idend = ieptr - 1;
+	    i__2 = idend;
+	    for (idptr = idbeg; idptr <= i__2; idptr += 2) {
+		if (work[ietgk + idptr - 1] == 0.) {
+
+/*                 Split in D (rectangular submatrix). Set the number */
+/*                 of rows in U and V (NRU and NRV) accordingly. */
+
+		    if (idptr == idbeg) {
+
+/*                    D=0 at the top. */
+
+			sveq0 = TRUE_;
+			if (idbeg == idend) {
+			    nru = 1;
+			    nrv = 1;
+			}
+		    } else if (idptr == idend) {
+
+/*                    D=0 at the bottom. */
+
+			sveq0 = TRUE_;
+			nru = (idend - isplt) / 2 + 1;
+			nrv = nru;
+			if (isplt != idbeg) {
+			    ++nru;
+			}
+		    } else {
+			if (isplt == idbeg) {
+
+/*                       Split: top rectangular submatrix. */
+
+			    nru = (idptr - idbeg) / 2;
+			    nrv = nru + 1;
+			} else {
+
+/*                       Split: middle square submatrix. */
+
+			    nru = (idptr - isplt) / 2 + 1;
+			    nrv = nru;
+			}
+		    }
+		} else if (idptr == idend) {
+
+/*                 Last entry of D in the active submatrix. */
+
+		    if (isplt == idbeg) {
+
+/*                    No split (trivial case). */
+
+			nru = (idend - idbeg) / 2 + 1;
+			nrv = nru;
+		    } else {
+
+/*                    Split: bottom rectangular submatrix. */
+
+			nrv = (idend - isplt) / 2 + 1;
+			nru = nrv + 1;
+		    }
+		}
+
+		ntgk = nru + nrv;
+
+		if (ntgk > 0) {
+
+/*                 Compute eigenvalues/vectors of the active */
+/*                 submatrix according to RANGE: */
+/*                 if RANGE='A' (ALLSV) then RNGVX = 'I' */
+/*                 if RANGE='V' (VALSV) then RNGVX = 'V' */
+/*                 if RANGE='I' (INDSV) then RNGVX = 'V' */
+
+		    iltgk = 1;
+		    iutgk = ntgk / 2;
+		    if (allsv || vutgk == 0.) {
+			if (sveq0 || smin < eps || ntgk % 2 > 0) {
+/*                        Special case: eigenvalue equal to zero or very */
+/*                        small, additional eigenvector is needed. */
+			    ++iutgk;
+			}
+		    }
+
+/*                 Workspace needed by DSTEVX: */
+/*                 WORK( ITEMP: ): 2*5*NTGK */
+/*                 IWORK( 1: ): 2*6*NTGK */
+
+		    dstevx_(jobz, rngvx, &ntgk, &work[idtgk + isplt - 1], &
+			    work[ietgk + isplt - 1], &vltgk, &vutgk, &iltgk, &
+			    iutgk, &abstol, &nsl, &s[isbeg], &z__[irowz + 
+			    icolz * z_dim1], ldz, &work[itemp], &iwork[iiwork]
+			    , &iwork[iifail], info);
+		    if (*info != 0) {
+/*                    Exit with the error code from DSTEVX. */
+			return 0;
+		    }
+		    emin = (d__1 = s[isbeg], abs(d__1));
+		    i__3 = isbeg + nsl - 1;
+		    for (i__ = isbeg; i__ <= i__3; ++i__) {
+			if ((d__1 = s[i__], abs(d__1)) > emin) {
+			    emin = s[i__];
+			}
+		    }
+/*                  EMIN = ABS( MAXVAL( S( ISBEG:ISBEG+NSL-1 ) ) ) */
+
+		    if (nsl > 0 && wantz) {
+
+/*                    Normalize u=Z([2,4,...],:) and v=Z([1,3,...],:), */
+/*                    changing the sign of v as discussed in the leading */
+/*                    comments. The norms of u and v may be (slightly) */
+/*                    different from 1/sqrt(2) if the corresponding */
+/*                    eigenvalues are very small or too close. We check */
+/*                    those norms and, if needed, reorthogonalize the */
+/*                    vectors. */
+
+			if (nsl > 1 && vutgk == 0. && ntgk % 2 == 0 && emin ==
+				 0. && ! split) {
+
+/*                       D=0 at the top or bottom of the active submatrix: */
+/*                       one eigenvalue is equal to zero; concatenate the */
+/*                       eigenvectors corresponding to the two smallest */
+/*                       eigenvalues. */
+
+			    i__3 = irowz + ntgk - 1;
+			    for (i__ = irowz; i__ <= i__3; ++i__) {
+				z__[i__ + (icolz + nsl - 2) * z_dim1] += z__[
+					i__ + (icolz + nsl - 1) * z_dim1];
+				z__[i__ + (icolz + nsl - 1) * z_dim1] = 0.;
+			    }
+/*                        Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-2 ) = */
+/*     $                  Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-2 ) + */
+/*     $                  Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-1 ) */
+/*                        Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-1 ) = */
+/*     $                  ZERO */
+/*                       IF( IUTGK*2.GT.NTGK ) THEN */
+/*                          Eigenvalue equal to zero or very small. */
+/*                          NSL = NSL - 1 */
+/*                       END IF */
+			}
+
+/* Computing MIN */
+			i__4 = nsl - 1, i__5 = nru - 1;
+			i__3 = f2cmin(i__4,i__5);
+			for (i__ = 0; i__ <= i__3; ++i__) {
+			    nrmu = dnrm2_(&nru, &z__[irowu + (icolz + i__) * 
+				    z_dim1], &c__2);
+			    if (nrmu == 0.) {
+				*info = (*n << 1) + 1;
+				return 0;
+			    }
+			    d__1 = 1. / nrmu;
+			    dscal_(&nru, &d__1, &z__[irowu + (icolz + i__) * 
+				    z_dim1], &c__2);
+			    if (nrmu != 1. && (d__1 = nrmu - ortol, abs(d__1))
+				     * sqrt2 > 1.) {
+				i__4 = i__ - 1;
+				for (j = 0; j <= i__4; ++j) {
+				    zjtji = -ddot_(&nru, &z__[irowu + (icolz 
+					    + j) * z_dim1], &c__2, &z__[irowu 
+					    + (icolz + i__) * z_dim1], &c__2);
+				    daxpy_(&nru, &zjtji, &z__[irowu + (icolz 
+					    + j) * z_dim1], &c__2, &z__[irowu 
+					    + (icolz + i__) * z_dim1], &c__2);
+				}
+				nrmu = dnrm2_(&nru, &z__[irowu + (icolz + i__)
+					 * z_dim1], &c__2);
+				d__1 = 1. / nrmu;
+				dscal_(&nru, &d__1, &z__[irowu + (icolz + i__)
+					 * z_dim1], &c__2);
+			    }
+			}
+/* Computing MIN */
+			i__4 = nsl - 1, i__5 = nrv - 1;
+			i__3 = f2cmin(i__4,i__5);
+			for (i__ = 0; i__ <= i__3; ++i__) {
+			    nrmv = dnrm2_(&nrv, &z__[irowv + (icolz + i__) * 
+				    z_dim1], &c__2);
+			    if (nrmv == 0.) {
+				*info = (*n << 1) + 1;
+				return 0;
+			    }
+			    d__1 = -1. / nrmv;
+			    dscal_(&nrv, &d__1, &z__[irowv + (icolz + i__) * 
+				    z_dim1], &c__2);
+			    if (nrmv != 1. && (d__1 = nrmv - ortol, abs(d__1))
+				     * sqrt2 > 1.) {
+				i__4 = i__ - 1;
+				for (j = 0; j <= i__4; ++j) {
+				    zjtji = -ddot_(&nrv, &z__[irowv + (icolz 
+					    + j) * z_dim1], &c__2, &z__[irowv 
+					    + (icolz + i__) * z_dim1], &c__2);
+				    daxpy_(&nru, &zjtji, &z__[irowv + (icolz 
+					    + j) * z_dim1], &c__2, &z__[irowv 
+					    + (icolz + i__) * z_dim1], &c__2);
+				}
+				nrmv = dnrm2_(&nrv, &z__[irowv + (icolz + i__)
+					 * z_dim1], &c__2);
+				d__1 = 1. / nrmv;
+				dscal_(&nrv, &d__1, &z__[irowv + (icolz + i__)
+					 * z_dim1], &c__2);
+			    }
+			}
+			if (vutgk == 0. && idptr < idend && ntgk % 2 > 0) {
+
+/*                       D=0 in the middle of the active submatrix (one */
+/*                       eigenvalue is equal to zero): save the corresponding */
+/*                       eigenvector for later use (when bottom of the */
+/*                       active submatrix is reached). */
+
+			    split = TRUE_;
+			    i__3 = irowz + ntgk - 1;
+			    for (i__ = irowz; i__ <= i__3; ++i__) {
+				z__[i__ + (*n + 1) * z_dim1] = z__[i__ + (*ns 
+					+ nsl) * z_dim1];
+				z__[i__ + (*ns + nsl) * z_dim1] = 0.;
+			    }
+/*                        Z( IROWZ:IROWZ+NTGK-1,N+1 ) = */
+/*     $                     Z( IROWZ:IROWZ+NTGK-1,NS+NSL ) */
+/*                        Z( IROWZ:IROWZ+NTGK-1,NS+NSL ) = */
+/*     $                     ZERO */
+			}
+		    }
+
+/* ** WANTZ **! */
+		    nsl = f2cmin(nsl,nru);
+		    sveq0 = FALSE_;
+
+/*                 Absolute values of the eigenvalues of TGK. */
+
+		    i__3 = nsl - 1;
+		    for (i__ = 0; i__ <= i__3; ++i__) {
+			s[isbeg + i__] = (d__1 = s[isbeg + i__], abs(d__1));
+		    }
+
+/*                 Update pointers for TGK, S and Z. */
+
+		    isbeg += nsl;
+		    irowz += ntgk;
+		    icolz += nsl;
+		    irowu = irowz;
+		    irowv = irowz + 1;
+		    isplt = idptr + 1;
+		    *ns += nsl;
+		    nru = 0;
+		    nrv = 0;
+		}
+/* ** NTGK.GT.0 **! */
+		if (irowz < *n << 1 && wantz) {
+		    i__3 = irowz - 1;
+		    for (i__ = 1; i__ <= i__3; ++i__) {
+			z__[i__ + icolz * z_dim1] = 0.;
+		    }
+/*                       Z( 1:IROWZ-1, ICOLZ ) = ZERO */
+		}
+	    }
+/* ** IDPTR loop **! */
+	    if (split && wantz) {
+
+/*              Bring back eigenvector corresponding */
+/*              to eigenvalue equal to zero. */
+
+		i__2 = idend - ntgk + 1;
+		for (i__ = idbeg; i__ <= i__2; ++i__) {
+		    z__[i__ + (isbeg - 1) * z_dim1] += z__[i__ + (*n + 1) * 
+			    z_dim1];
+		    z__[i__ + (*n + 1) * z_dim1] = 0.;
+		}
+/*               Z( IDBEG:IDEND-NTGK+1,ISBEG-1 ) = */
+/*     $         Z( IDBEG:IDEND-NTGK+1,ISBEG-1 ) + */
+/*     $         Z( IDBEG:IDEND-NTGK+1,N+1 ) */
+/*               Z( IDBEG:IDEND-NTGK+1,N+1 ) = 0 */
+	    }
+	    --irowv;
+	    ++irowu;
+	    idbeg = ieptr + 1;
+	    sveq0 = FALSE_;
+	    split = FALSE_;
+	}
+/* ** Check for split in E **! */
+    }
+
+/*     Sort the singular values into decreasing order (insertion sort on */
+/*     singular values, but only one transposition per singular vector) */
+
+/* ** IEPTR loop **! */
+    i__1 = *ns - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	k = 1;
+	smin = s[1];
+	i__2 = *ns + 1 - i__;
+	for (j = 2; j <= i__2; ++j) {
+	    if (s[j] <= smin) {
+		k = j;
+		smin = s[j];
+	    }
+	}
+	if (k != *ns + 1 - i__) {
+	    s[k] = s[*ns + 1 - i__];
+	    s[*ns + 1 - i__] = smin;
+	    if (wantz) {
+		i__2 = *n << 1;
+		dswap_(&i__2, &z__[k * z_dim1 + 1], &c__1, &z__[(*ns + 1 - 
+			i__) * z_dim1 + 1], &c__1);
+	    }
+	}
+    }
+
+/*     If RANGE=I, check for singular values/vectors to be discarded. */
+
+    if (indsv) {
+	k = *iu - *il + 1;
+	if (k < *ns) {
+	    i__1 = *ns;
+	    for (i__ = k + 1; i__ <= i__1; ++i__) {
+		s[i__] = 0.;
+	    }
+/*            S( K+1:NS ) = ZERO */
+	    if (wantz) {
+		i__1 = *n << 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = *ns;
+		    for (j = k + 1; j <= i__2; ++j) {
+			z__[i__ + j * z_dim1] = 0.;
+		    }
+		}
+/*           Z( 1:N*2,K+1:NS ) = ZERO */
+	    }
+	    *ns = k;
+	}
+    }
+
+/*     Reorder Z: U = Z( 1:N,1:NS ), V = Z( N+1:N*2,1:NS ). */
+/*     If B is a lower diagonal, swap U and V. */
+
+    if (wantz) {
+	i__1 = *ns;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = *n << 1;
+	    dcopy_(&i__2, &z__[i__ * z_dim1 + 1], &c__1, &work[1], &c__1);
+	    if (lower) {
+		dcopy_(n, &work[2], &c__2, &z__[*n + 1 + i__ * z_dim1], &c__1)
+			;
+		dcopy_(n, &work[1], &c__2, &z__[i__ * z_dim1 + 1], &c__1);
+	    } else {
+		dcopy_(n, &work[2], &c__2, &z__[i__ * z_dim1 + 1], &c__1);
+		dcopy_(n, &work[1], &c__2, &z__[*n + 1 + i__ * z_dim1], &c__1)
+			;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DBDSVDX */
+
+} /* dbdsvdx_ */
+
diff --git a/lapack-netlib/SRC/dcombssq.c b/lapack-netlib/SRC/dcombssq.c
new file mode 100644
index 000000000..0990d084c
--- /dev/null
+++ b/lapack-netlib/SRC/dcombssq.c
@@ -0,0 +1,486 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DCOMBSSQ adds two scaled sum of squares quantities. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DCOMBSSQ( V1, V2 ) */
+
+/*       DOUBLE PRECISION   V1( 2 ), V2( 2 ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DCOMBSSQ adds two scaled sum of squares quantities, V1 := V1 + V2. */
+/* > That is, */
+/* > */
+/* >    V1_scale**2 * V1_sumsq := V1_scale**2 * V1_sumsq */
+/* >                            + V2_scale**2 * V2_sumsq */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in,out] V1 */
+/* > \verbatim */
+/* >          V1 is DOUBLE PRECISION array, dimension (2). */
+/* >          The first scaled sum. */
+/* >          V1(1) = V1_scale, V1(2) = V1_sumsq. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V2 */
+/* > \verbatim */
+/* >          V2 is DOUBLE PRECISION array, dimension (2). */
+/* >          The second scaled sum. */
+/* >          V2(1) = V2_scale, V2(2) = V2_sumsq. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2018 */
+
+/* > \ingroup OTHERauxiliary */
+
+/*  ===================================================================== */
+/* Subroutine */ int dcombssq_(doublereal *v1, doublereal *v2)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+
+/*  -- LAPACK auxiliary routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2018 */
+
+
+/* ===================================================================== */
+
+
+    /* Parameter adjustments */
+    --v2;
+    --v1;
+
+    /* Function Body */
+    if (v1[1] >= v2[1]) {
+	if (v1[1] != 0.) {
+/* Computing 2nd power */
+	    d__1 = v2[1] / v1[1];
+	    v1[2] += d__1 * d__1 * v2[2];
+	} else {
+	    v1[2] += v2[2];
+	}
+    } else {
+/* Computing 2nd power */
+	d__1 = v1[1] / v2[1];
+	v1[2] = v2[2] + d__1 * d__1 * v1[2];
+	v1[1] = v2[1];
+    }
+    return 0;
+
+/*     End of DCOMBSSQ */
+
+} /* dcombssq_ */
+
diff --git a/lapack-netlib/SRC/ddisna.c b/lapack-netlib/SRC/ddisna.c
new file mode 100644
index 000000000..e34d9a03c
--- /dev/null
+++ b/lapack-netlib/SRC/ddisna.c
@@ -0,0 +1,651 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DDISNA */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DDISNA + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ddisna.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ddisna.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ddisna.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DDISNA( JOB, M, N, D, SEP, INFO ) */
+
+/*       CHARACTER          JOB */
+/*       INTEGER            INFO, M, N */
+/*       DOUBLE PRECISION   D( * ), SEP( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DDISNA computes the reciprocal condition numbers for the eigenvectors */
+/* > of a real symmetric or complex Hermitian matrix or for the left or */
+/* > right singular vectors of a general m-by-n matrix. The reciprocal */
+/* > condition number is the 'gap' between the corresponding eigenvalue or */
+/* > singular value and the nearest other one. */
+/* > */
+/* > The bound on the error, measured by angle in radians, in the I-th */
+/* > computed vector is given by */
+/* > */
+/* >        DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) */
+/* > */
+/* > where ANORM = 2-norm(A) = f2cmax( abs( D(j) ) ).  SEP(I) is not allowed */
+/* > to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of */
+/* > the error bound. */
+/* > */
+/* > DDISNA may also be used to compute error bounds for eigenvectors of */
+/* > the generalized symmetric definite eigenproblem. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOB */
+/* > \verbatim */
+/* >          JOB is CHARACTER*1 */
+/* >          Specifies for which problem the reciprocal condition numbers */
+/* >          should be computed: */
+/* >          = 'E':  the eigenvectors of a symmetric/Hermitian matrix; */
+/* >          = 'L':  the left singular vectors of a general matrix; */
+/* >          = 'R':  the right singular vectors of a general matrix. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          If JOB = 'L' or 'R', the number of columns of the matrix, */
+/* >          in which case N >= 0. Ignored if JOB = 'E'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] D */
+/* > \verbatim */
+/* >          D is DOUBLE PRECISION array, dimension (M) if JOB = 'E' */
+/* >                              dimension (f2cmin(M,N)) if JOB = 'L' or 'R' */
+/* >          The eigenvalues (if JOB = 'E') or singular values (if JOB = */
+/* >          'L' or 'R') of the matrix, in either increasing or decreasing */
+/* >          order. If singular values, they must be non-negative. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SEP */
+/* > \verbatim */
+/* >          SEP is DOUBLE PRECISION array, dimension (M) if JOB = 'E' */
+/* >                               dimension (f2cmin(M,N)) if JOB = 'L' or 'R' */
+/* >          The reciprocal condition numbers of the vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup auxOTHERcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int ddisna_(char *job, integer *m, integer *n, doublereal *
+	d__, doublereal *sep, integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    logical decr, left, incr, sing;
+    integer i__, k;
+    logical eigen;
+    extern logical lsame_(char *, char *);
+    doublereal anorm;
+    logical right;
+    extern doublereal dlamch_(char *);
+    doublereal oldgap, safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    doublereal newgap, thresh, eps;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    --sep;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    eigen = lsame_(job, "E");
+    left = lsame_(job, "L");
+    right = lsame_(job, "R");
+    sing = left || right;
+    if (eigen) {
+	k = *m;
+    } else if (sing) {
+	k = f2cmin(*m,*n);
+    }
+    if (! eigen && ! sing) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (k < 0) {
+	*info = -3;
+    } else {
+	incr = TRUE_;
+	decr = TRUE_;
+	i__1 = k - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (incr) {
+		incr = incr && d__[i__] <= d__[i__ + 1];
+	    }
+	    if (decr) {
+		decr = decr && d__[i__] >= d__[i__ + 1];
+	    }
+/* L10: */
+	}
+	if (sing && k > 0) {
+	    if (incr) {
+		incr = incr && 0. <= d__[1];
+	    }
+	    if (decr) {
+		decr = decr && d__[k] >= 0.;
+	    }
+	}
+	if (! (incr || decr)) {
+	    *info = -4;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DDISNA", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (k == 0) {
+	return 0;
+    }
+
+/*     Compute reciprocal condition numbers */
+
+    if (k == 1) {
+	sep[1] = dlamch_("O");
+    } else {
+	oldgap = (d__1 = d__[2] - d__[1], abs(d__1));
+	sep[1] = oldgap;
+	i__1 = k - 1;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    newgap = (d__1 = d__[i__ + 1] - d__[i__], abs(d__1));
+	    sep[i__] = f2cmin(oldgap,newgap);
+	    oldgap = newgap;
+/* L20: */
+	}
+	sep[k] = oldgap;
+    }
+    if (sing) {
+	if (left && *m > *n || right && *m < *n) {
+	    if (incr) {
+		sep[1] = f2cmin(sep[1],d__[1]);
+	    }
+	    if (decr) {
+/* Computing MIN */
+		d__1 = sep[k], d__2 = d__[k];
+		sep[k] = f2cmin(d__1,d__2);
+	    }
+	}
+    }
+
+/*     Ensure that reciprocal condition numbers are not less than */
+/*     threshold, in order to limit the size of the error bound */
+
+    eps = dlamch_("E");
+    safmin = dlamch_("S");
+/* Computing MAX */
+    d__2 = abs(d__[1]), d__3 = (d__1 = d__[k], abs(d__1));
+    anorm = f2cmax(d__2,d__3);
+    if (anorm == 0.) {
+	thresh = eps;
+    } else {
+/* Computing MAX */
+	d__1 = eps * anorm;
+	thresh = f2cmax(d__1,safmin);
+    }
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__1 = sep[i__];
+	sep[i__] = f2cmax(d__1,thresh);
+/* L30: */
+    }
+
+    return 0;
+
+/*     End of DDISNA */
+
+} /* ddisna_ */
+
diff --git a/lapack-netlib/SRC/dgbbrd.c b/lapack-netlib/SRC/dgbbrd.c
new file mode 100644
index 000000000..b54d35d11
--- /dev/null
+++ b/lapack-netlib/SRC/dgbbrd.c
@@ -0,0 +1,1033 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static doublereal c_b8 = 0.;
+static doublereal c_b9 = 1.;
+static integer c__1 = 1;
+
+/* > \brief \b DGBBRD */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBBRD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbbrd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbbrd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbbrd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, */
+/*                          LDQ, PT, LDPT, C, LDC, WORK, INFO ) */
+
+/*       CHARACTER          VECT */
+/*       INTEGER            INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC */
+/*       DOUBLE PRECISION   AB( LDAB, * ), C( LDC, * ), D( * ), E( * ), */
+/*      $                   PT( LDPT, * ), Q( LDQ, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBBRD reduces a real general m-by-n band matrix A to upper */
+/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */
+/* > */
+/* > The routine computes B, and optionally forms Q or P**T, or computes */
+/* > Q**T*C for a given matrix C. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] VECT */
+/* > \verbatim */
+/* >          VECT is CHARACTER*1 */
+/* >          Specifies whether or not the matrices Q and P**T are to be */
+/* >          formed. */
+/* >          = 'N': do not form Q or P**T; */
+/* >          = 'Q': form Q only; */
+/* >          = 'P': form P**T only; */
+/* >          = 'B': form both. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NCC */
+/* > \verbatim */
+/* >          NCC is INTEGER */
+/* >          The number of columns of the matrix C.  NCC >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals of the matrix A. KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals of the matrix A. KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          On entry, the m-by-n band matrix A, stored in rows 1 to */
+/* >          KL+KU+1. The j-th column of A is stored in the j-th column of */
+/* >          the array AB as follows: */
+/* >          AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl). */
+/* >          On exit, A is overwritten by values generated during the */
+/* >          reduction. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array A. LDAB >= KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] D */
+/* > \verbatim */
+/* >          D is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The diagonal elements of the bidiagonal matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] E */
+/* > \verbatim */
+/* >          E is DOUBLE PRECISION array, dimension (f2cmin(M,N)-1) */
+/* >          The superdiagonal elements of the bidiagonal matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] Q */
+/* > \verbatim */
+/* >          Q is DOUBLE PRECISION array, dimension (LDQ,M) */
+/* >          If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. */
+/* >          If VECT = 'N' or 'P', the array Q is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDQ */
+/* > \verbatim */
+/* >          LDQ is INTEGER */
+/* >          The leading dimension of the array Q. */
+/* >          LDQ >= f2cmax(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] PT */
+/* > \verbatim */
+/* >          PT is DOUBLE PRECISION array, dimension (LDPT,N) */
+/* >          If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. */
+/* >          If VECT = 'N' or 'Q', the array PT is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDPT */
+/* > \verbatim */
+/* >          LDPT is INTEGER */
+/* >          The leading dimension of the array PT. */
+/* >          LDPT >= f2cmax(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (LDC,NCC) */
+/* >          On entry, an m-by-ncc matrix C. */
+/* >          On exit, C is overwritten by Q**T*C. */
+/* >          C is not referenced if NCC = 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. */
+/* >          LDC >= f2cmax(1,M) if NCC > 0; LDC >= 1 if NCC = 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (2*f2cmax(M,N)) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgbbrd_(char *vect, integer *m, integer *n, integer *ncc,
+	 integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *
+	d__, doublereal *e, doublereal *q, integer *ldq, doublereal *pt, 
+	integer *ldpt, doublereal *c__, integer *ldc, doublereal *work, 
+	integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1, 
+	    q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
+
+    /* Local variables */
+    integer inca;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *);
+    integer i__, j, l;
+    extern logical lsame_(char *, char *);
+    logical wantb, wantc;
+    integer minmn;
+    logical wantq;
+    integer j1, j2, kb;
+    doublereal ra, rb;
+    integer kk;
+    doublereal rc;
+    integer ml, mn, nr, mu;
+    doublereal rs;
+    extern /* Subroutine */ int dlaset_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *), 
+	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, 
+	    doublereal *), xerbla_(char *, integer *, ftnlen), dlargv_(
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *), dlartv_(integer *, doublereal *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *);
+    integer kb1, ml0;
+    logical wantpt;
+    integer mu0, klm, kun, nrt, klu1;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --d__;
+    --e;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1 * 1;
+    q -= q_offset;
+    pt_dim1 = *ldpt;
+    pt_offset = 1 + pt_dim1 * 1;
+    pt -= pt_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    wantb = lsame_(vect, "B");
+    wantq = lsame_(vect, "Q") || wantb;
+    wantpt = lsame_(vect, "P") || wantb;
+    wantc = *ncc > 0;
+    klu1 = *kl + *ku + 1;
+    *info = 0;
+    if (! wantq && ! wantpt && ! lsame_(vect, "N")) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ncc < 0) {
+	*info = -4;
+    } else if (*kl < 0) {
+	*info = -5;
+    } else if (*ku < 0) {
+	*info = -6;
+    } else if (*ldab < klu1) {
+	*info = -8;
+    } else if (*ldq < 1 || wantq && *ldq < f2cmax(1,*m)) {
+	*info = -12;
+    } else if (*ldpt < 1 || wantpt && *ldpt < f2cmax(1,*n)) {
+	*info = -14;
+    } else if (*ldc < 1 || wantc && *ldc < f2cmax(1,*m)) {
+	*info = -16;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBBRD", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Initialize Q and P**T to the unit matrix, if needed */
+
+    if (wantq) {
+	dlaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq);
+    }
+    if (wantpt) {
+	dlaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt);
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+    minmn = f2cmin(*m,*n);
+
+    if (*kl + *ku > 1) {
+
+/*        Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce */
+/*        first to lower bidiagonal form and then transform to upper */
+/*        bidiagonal */
+
+	if (*ku > 0) {
+	    ml0 = 1;
+	    mu0 = 2;
+	} else {
+	    ml0 = 2;
+	    mu0 = 1;
+	}
+
+/*        Wherever possible, plane rotations are generated and applied in */
+/*        vector operations of length NR over the index set J1:J2:KLU1. */
+
+/*        The sines of the plane rotations are stored in WORK(1:f2cmax(m,n)) */
+/*        and the cosines in WORK(f2cmax(m,n)+1:2*f2cmax(m,n)). */
+
+	mn = f2cmax(*m,*n);
+/* Computing MIN */
+	i__1 = *m - 1;
+	klm = f2cmin(i__1,*kl);
+/* Computing MIN */
+	i__1 = *n - 1;
+	kun = f2cmin(i__1,*ku);
+	kb = klm + kun;
+	kb1 = kb + 1;
+	inca = kb1 * *ldab;
+	nr = 0;
+	j1 = klm + 2;
+	j2 = 1 - kun;
+
+	i__1 = minmn;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Reduce i-th column and i-th row of matrix to bidiagonal form */
+
+	    ml = klm + 1;
+	    mu = kun + 1;
+	    i__2 = kb;
+	    for (kk = 1; kk <= i__2; ++kk) {
+		j1 += kb;
+		j2 += kb;
+
+/*              generate plane rotations to annihilate nonzero elements */
+/*              which have been created below the band */
+
+		if (nr > 0) {
+		    dlargv_(&nr, &ab[klu1 + (j1 - klm - 1) * ab_dim1], &inca, 
+			    &work[j1], &kb1, &work[mn + j1], &kb1);
+		}
+
+/*              apply plane rotations from the left */
+
+		i__3 = kb;
+		for (l = 1; l <= i__3; ++l) {
+		    if (j2 - klm + l - 1 > *n) {
+			nrt = nr - 1;
+		    } else {
+			nrt = nr;
+		    }
+		    if (nrt > 0) {
+			dlartv_(&nrt, &ab[klu1 - l + (j1 - klm + l - 1) * 
+				ab_dim1], &inca, &ab[klu1 - l + 1 + (j1 - klm 
+				+ l - 1) * ab_dim1], &inca, &work[mn + j1], &
+				work[j1], &kb1);
+		    }
+/* L10: */
+		}
+
+		if (ml > ml0) {
+		    if (ml <= *m - i__ + 1) {
+
+/*                    generate plane rotation to annihilate a(i+ml-1,i) */
+/*                    within the band, and apply rotation from the left */
+
+			dlartg_(&ab[*ku + ml - 1 + i__ * ab_dim1], &ab[*ku + 
+				ml + i__ * ab_dim1], &work[mn + i__ + ml - 1],
+				 &work[i__ + ml - 1], &ra);
+			ab[*ku + ml - 1 + i__ * ab_dim1] = ra;
+			if (i__ < *n) {
+/* Computing MIN */
+			    i__4 = *ku + ml - 2, i__5 = *n - i__;
+			    i__3 = f2cmin(i__4,i__5);
+			    i__6 = *ldab - 1;
+			    i__7 = *ldab - 1;
+			    drot_(&i__3, &ab[*ku + ml - 2 + (i__ + 1) * 
+				    ab_dim1], &i__6, &ab[*ku + ml - 1 + (i__ 
+				    + 1) * ab_dim1], &i__7, &work[mn + i__ + 
+				    ml - 1], &work[i__ + ml - 1]);
+			}
+		    }
+		    ++nr;
+		    j1 -= kb1;
+		}
+
+		if (wantq) {
+
+/*                 accumulate product of plane rotations in Q */
+
+		    i__3 = j2;
+		    i__4 = kb1;
+		    for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) 
+			    {
+			drot_(m, &q[(j - 1) * q_dim1 + 1], &c__1, &q[j * 
+				q_dim1 + 1], &c__1, &work[mn + j], &work[j]);
+/* L20: */
+		    }
+		}
+
+		if (wantc) {
+
+/*                 apply plane rotations to C */
+
+		    i__4 = j2;
+		    i__3 = kb1;
+		    for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) 
+			    {
+			drot_(ncc, &c__[j - 1 + c_dim1], ldc, &c__[j + c_dim1]
+				, ldc, &work[mn + j], &work[j]);
+/* L30: */
+		    }
+		}
+
+		if (j2 + kun > *n) {
+
+/*                 adjust J2 to keep within the bounds of the matrix */
+
+		    --nr;
+		    j2 -= kb1;
+		}
+
+		i__3 = j2;
+		i__4 = kb1;
+		for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) {
+
+/*                 create nonzero element a(j-1,j+ku) above the band */
+/*                 and store it in WORK(n+1:2*n) */
+
+		    work[j + kun] = work[j] * ab[(j + kun) * ab_dim1 + 1];
+		    ab[(j + kun) * ab_dim1 + 1] = work[mn + j] * ab[(j + kun) 
+			    * ab_dim1 + 1];
+/* L40: */
+		}
+
+/*              generate plane rotations to annihilate nonzero elements */
+/*              which have been generated above the band */
+
+		if (nr > 0) {
+		    dlargv_(&nr, &ab[(j1 + kun - 1) * ab_dim1 + 1], &inca, &
+			    work[j1 + kun], &kb1, &work[mn + j1 + kun], &kb1);
+		}
+
+/*              apply plane rotations from the right */
+
+		i__4 = kb;
+		for (l = 1; l <= i__4; ++l) {
+		    if (j2 + l - 1 > *m) {
+			nrt = nr - 1;
+		    } else {
+			nrt = nr;
+		    }
+		    if (nrt > 0) {
+			dlartv_(&nrt, &ab[l + 1 + (j1 + kun - 1) * ab_dim1], &
+				inca, &ab[l + (j1 + kun) * ab_dim1], &inca, &
+				work[mn + j1 + kun], &work[j1 + kun], &kb1);
+		    }
+/* L50: */
+		}
+
+		if (ml == ml0 && mu > mu0) {
+		    if (mu <= *n - i__ + 1) {
+
+/*                    generate plane rotation to annihilate a(i,i+mu-1) */
+/*                    within the band, and apply rotation from the right */
+
+			dlartg_(&ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1], 
+				&ab[*ku - mu + 2 + (i__ + mu - 1) * ab_dim1], 
+				&work[mn + i__ + mu - 1], &work[i__ + mu - 1],
+				 &ra);
+			ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1] = ra;
+/* Computing MIN */
+			i__3 = *kl + mu - 2, i__5 = *m - i__;
+			i__4 = f2cmin(i__3,i__5);
+			drot_(&i__4, &ab[*ku - mu + 4 + (i__ + mu - 2) * 
+				ab_dim1], &c__1, &ab[*ku - mu + 3 + (i__ + mu 
+				- 1) * ab_dim1], &c__1, &work[mn + i__ + mu - 
+				1], &work[i__ + mu - 1]);
+		    }
+		    ++nr;
+		    j1 -= kb1;
+		}
+
+		if (wantpt) {
+
+/*                 accumulate product of plane rotations in P**T */
+
+		    i__4 = j2;
+		    i__3 = kb1;
+		    for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) 
+			    {
+			drot_(n, &pt[j + kun - 1 + pt_dim1], ldpt, &pt[j + 
+				kun + pt_dim1], ldpt, &work[mn + j + kun], &
+				work[j + kun]);
+/* L60: */
+		    }
+		}
+
+		if (j2 + kb > *m) {
+
+/*                 adjust J2 to keep within the bounds of the matrix */
+
+		    --nr;
+		    j2 -= kb1;
+		}
+
+		i__3 = j2;
+		i__4 = kb1;
+		for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) {
+
+/*                 create nonzero element a(j+kl+ku,j+ku-1) below the */
+/*                 band and store it in WORK(1:n) */
+
+		    work[j + kb] = work[j + kun] * ab[klu1 + (j + kun) * 
+			    ab_dim1];
+		    ab[klu1 + (j + kun) * ab_dim1] = work[mn + j + kun] * ab[
+			    klu1 + (j + kun) * ab_dim1];
+/* L70: */
+		}
+
+		if (ml > ml0) {
+		    --ml;
+		} else {
+		    --mu;
+		}
+/* L80: */
+	    }
+/* L90: */
+	}
+    }
+
+    if (*ku == 0 && *kl > 0) {
+
+/*        A has been reduced to lower bidiagonal form */
+
+/*        Transform lower bidiagonal form to upper bidiagonal by applying */
+/*        plane rotations from the left, storing diagonal elements in D */
+/*        and off-diagonal elements in E */
+
+/* Computing MIN */
+	i__2 = *m - 1;
+	i__1 = f2cmin(i__2,*n);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&ab[i__ * ab_dim1 + 1], &ab[i__ * ab_dim1 + 2], &rc, &rs, 
+		    &ra);
+	    d__[i__] = ra;
+	    if (i__ < *n) {
+		e[i__] = rs * ab[(i__ + 1) * ab_dim1 + 1];
+		ab[(i__ + 1) * ab_dim1 + 1] = rc * ab[(i__ + 1) * ab_dim1 + 1]
+			;
+	    }
+	    if (wantq) {
+		drot_(m, &q[i__ * q_dim1 + 1], &c__1, &q[(i__ + 1) * q_dim1 + 
+			1], &c__1, &rc, &rs);
+	    }
+	    if (wantc) {
+		drot_(ncc, &c__[i__ + c_dim1], ldc, &c__[i__ + 1 + c_dim1], 
+			ldc, &rc, &rs);
+	    }
+/* L100: */
+	}
+	if (*m <= *n) {
+	    d__[*m] = ab[*m * ab_dim1 + 1];
+	}
+    } else if (*ku > 0) {
+
+/*        A has been reduced to upper bidiagonal form */
+
+	if (*m < *n) {
+
+/*           Annihilate a(m,m+1) by applying plane rotations from the */
+/*           right, storing diagonal elements in D and off-diagonal */
+/*           elements in E */
+
+	    rb = ab[*ku + (*m + 1) * ab_dim1];
+	    for (i__ = *m; i__ >= 1; --i__) {
+		dlartg_(&ab[*ku + 1 + i__ * ab_dim1], &rb, &rc, &rs, &ra);
+		d__[i__] = ra;
+		if (i__ > 1) {
+		    rb = -rs * ab[*ku + i__ * ab_dim1];
+		    e[i__ - 1] = rc * ab[*ku + i__ * ab_dim1];
+		}
+		if (wantpt) {
+		    drot_(n, &pt[i__ + pt_dim1], ldpt, &pt[*m + 1 + pt_dim1], 
+			    ldpt, &rc, &rs);
+		}
+/* L110: */
+	    }
+	} else {
+
+/*           Copy off-diagonal elements to E and diagonal elements to D */
+
+	    i__1 = minmn - 1;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		e[i__] = ab[*ku + (i__ + 1) * ab_dim1];
+/* L120: */
+	    }
+	    i__1 = minmn;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		d__[i__] = ab[*ku + 1 + i__ * ab_dim1];
+/* L130: */
+	    }
+	}
+    } else {
+
+/*        A is diagonal. Set elements of E to zero and copy diagonal */
+/*        elements to D. */
+
+	i__1 = minmn - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    e[i__] = 0.;
+/* L140: */
+	}
+	i__1 = minmn;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d__[i__] = ab[i__ * ab_dim1 + 1];
+/* L150: */
+	}
+    }
+    return 0;
+
+/*     End of DGBBRD */
+
+} /* dgbbrd_ */
+
diff --git a/lapack-netlib/SRC/dgbcon.c b/lapack-netlib/SRC/dgbcon.c
new file mode 100644
index 000000000..13759c77f
--- /dev/null
+++ b/lapack-netlib/SRC/dgbcon.c
@@ -0,0 +1,725 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b DGBCON */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBCON + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbcon.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbcon.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbcon.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, */
+/*                          WORK, IWORK, INFO ) */
+
+/*       CHARACTER          NORM */
+/*       INTEGER            INFO, KL, KU, LDAB, N */
+/*       DOUBLE PRECISION   ANORM, RCOND */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   AB( LDAB, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBCON estimates the reciprocal of the condition number of a real */
+/* > general band matrix A, in either the 1-norm or the infinity-norm, */
+/* > using the LU factorization computed by DGBTRF. */
+/* > */
+/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
+/* > condition number is computed as */
+/* >    RCOND = 1 / ( norm(A) * norm(inv(A)) ). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] NORM */
+/* > \verbatim */
+/* >          NORM is CHARACTER*1 */
+/* >          Specifies whether the 1-norm condition number or the */
+/* >          infinity-norm condition number is required: */
+/* >          = '1' or 'O':  1-norm; */
+/* >          = 'I':         Infinity-norm. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          Details of the LU factorization of the band matrix A, as */
+/* >          computed by DGBTRF.  U is stored as an upper triangular band */
+/* >          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
+/* >          the multipliers used during the factorization are stored in */
+/* >          rows KL+KU+2 to 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices; for 1 <= i <= N, row i of the matrix was */
+/* >          interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ANORM */
+/* > \verbatim */
+/* >          ANORM is DOUBLE PRECISION */
+/* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
+/* >          If NORM = 'I', the infinity-norm of the original matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >          The reciprocal of the condition number of the matrix A, */
+/* >          computed as RCOND = 1/(norm(A) * norm(inv(A))). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (3*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgbcon_(char *norm, integer *n, integer *kl, integer *ku,
+	 doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, 
+	doublereal *rcond, doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    integer kase;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer kase1, j;
+    doublereal t, scale;
+    extern logical lsame_(char *, char *);
+    integer isave[3];
+    extern /* Subroutine */ int drscl_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    logical lnoti;
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *), dlacn2_(integer *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    integer *);
+    integer kd;
+    extern doublereal dlamch_(char *);
+    integer lm, jp, ix;
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlatbs_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
+    doublereal ainvnm;
+    logical onenrm;
+    char normin[1];
+    doublereal smlnum;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --ipiv;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
+    if (! onenrm && ! lsame_(norm, "I")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0) {
+	*info = -3;
+    } else if (*ku < 0) {
+	*info = -4;
+    } else if (*ldab < (*kl << 1) + *ku + 1) {
+	*info = -6;
+    } else if (*anorm < 0.) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBCON", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    *rcond = 0.;
+    if (*n == 0) {
+	*rcond = 1.;
+	return 0;
+    } else if (*anorm == 0.) {
+	return 0;
+    }
+
+    smlnum = dlamch_("Safe minimum");
+
+/*     Estimate the norm of inv(A). */
+
+    ainvnm = 0.;
+    *(unsigned char *)normin = 'N';
+    if (onenrm) {
+	kase1 = 1;
+    } else {
+	kase1 = 2;
+    }
+    kd = *kl + *ku + 1;
+    lnoti = *kl > 0;
+    kase = 0;
+L10:
+    dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
+    if (kase != 0) {
+	if (kase == kase1) {
+
+/*           Multiply by inv(L). */
+
+	    if (lnoti) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		    i__2 = *kl, i__3 = *n - j;
+		    lm = f2cmin(i__2,i__3);
+		    jp = ipiv[j];
+		    t = work[jp];
+		    if (jp != j) {
+			work[jp] = work[j];
+			work[j] = t;
+		    }
+		    d__1 = -t;
+		    daxpy_(&lm, &d__1, &ab[kd + 1 + j * ab_dim1], &c__1, &
+			    work[j + 1], &c__1);
+/* L20: */
+		}
+	    }
+
+/*           Multiply by inv(U). */
+
+	    i__1 = *kl + *ku;
+	    dlatbs_("Upper", "No transpose", "Non-unit", normin, n, &i__1, &
+		    ab[ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 
+		    1], info);
+	} else {
+
+/*           Multiply by inv(U**T). */
+
+	    i__1 = *kl + *ku;
+	    dlatbs_("Upper", "Transpose", "Non-unit", normin, n, &i__1, &ab[
+		    ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 1], 
+		    info);
+
+/*           Multiply by inv(L**T). */
+
+	    if (lnoti) {
+		for (j = *n - 1; j >= 1; --j) {
+/* Computing MIN */
+		    i__1 = *kl, i__2 = *n - j;
+		    lm = f2cmin(i__1,i__2);
+		    work[j] -= ddot_(&lm, &ab[kd + 1 + j * ab_dim1], &c__1, &
+			    work[j + 1], &c__1);
+		    jp = ipiv[j];
+		    if (jp != j) {
+			t = work[jp];
+			work[jp] = work[j];
+			work[j] = t;
+		    }
+/* L30: */
+		}
+	    }
+	}
+
+/*        Divide X by 1/SCALE if doing so will not cause overflow. */
+
+	*(unsigned char *)normin = 'Y';
+	if (scale != 1.) {
+	    ix = idamax_(n, &work[1], &c__1);
+	    if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) 
+		    {
+		goto L40;
+	    }
+	    drscl_(n, &scale, &work[1], &c__1);
+	}
+	goto L10;
+    }
+
+/*     Compute the estimate of the reciprocal condition number. */
+
+    if (ainvnm != 0.) {
+	*rcond = 1. / ainvnm / *anorm;
+    }
+
+L40:
+    return 0;
+
+/*     End of DGBCON */
+
+} /* dgbcon_ */
+
diff --git a/lapack-netlib/SRC/dgbequ.c b/lapack-netlib/SRC/dgbequ.c
new file mode 100644
index 000000000..f96f45574
--- /dev/null
+++ b/lapack-netlib/SRC/dgbequ.c
@@ -0,0 +1,764 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGBEQU */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBEQU + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbequ.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbequ.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbequ.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, */
+/*                          AMAX, INFO ) */
+
+/*       INTEGER            INFO, KL, KU, LDAB, M, N */
+/*       DOUBLE PRECISION   AMAX, COLCND, ROWCND */
+/*       DOUBLE PRECISION   AB( LDAB, * ), C( * ), R( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBEQU computes row and column scalings intended to equilibrate an */
+/* > M-by-N band matrix A and reduce its condition number.  R returns the */
+/* > row scale factors and C the column scale factors, chosen to try to */
+/* > make the largest element in each row and column of the matrix B with */
+/* > elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */
+/* > */
+/* > R(i) and C(j) are restricted to be between SMLNUM = smallest safe */
+/* > number and BIGNUM = largest safe number.  Use of these scaling */
+/* > factors is not guaranteed to reduce the condition number of A but */
+/* > works well in practice. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th */
+/* >          column of A is stored in the j-th column of the array AB as */
+/* >          follows: */
+/* >          AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array AB.  LDAB >= KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (M) */
+/* >          If INFO = 0, or INFO > M, R contains the row scale factors */
+/* >          for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >          If INFO = 0, C contains the column scale factors for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ROWCND */
+/* > \verbatim */
+/* >          ROWCND is DOUBLE PRECISION */
+/* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
+/* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
+/* >          AMAX is neither too large nor too small, it is not worth */
+/* >          scaling by R. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] COLCND */
+/* > \verbatim */
+/* >          COLCND is DOUBLE PRECISION */
+/* >          If INFO = 0, COLCND contains the ratio of the smallest */
+/* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
+/* >          worth scaling by C. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] AMAX */
+/* > \verbatim */
+/* >          AMAX is DOUBLE PRECISION */
+/* >          Absolute value of largest matrix element.  If AMAX is very */
+/* >          close to overflow or very close to underflow, the matrix */
+/* >          should be scaled. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, and i is */
+/* >                <= M:  the i-th row of A is exactly zero */
+/* >                >  M:  the (i-M)-th column of A is exactly zero */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgbequ_(integer *m, integer *n, integer *kl, integer *ku,
+	 doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, 
+	doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *
+	info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    integer i__, j;
+    doublereal rcmin, rcmax;
+    integer kd;
+    extern doublereal dlamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    doublereal bignum, smlnum;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --r__;
+    --c__;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0) {
+	*info = -3;
+    } else if (*ku < 0) {
+	*info = -4;
+    } else if (*ldab < *kl + *ku + 1) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBEQU", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	*rowcnd = 1.;
+	*colcnd = 1.;
+	*amax = 0.;
+	return 0;
+    }
+
+/*     Get machine constants. */
+
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+
+/*     Compute row scale factors. */
+
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__[i__] = 0.;
+/* L10: */
+    }
+
+/*     Find the maximum element in each row. */
+
+    kd = *ku + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	i__2 = j - *ku;
+/* Computing MIN */
+	i__4 = j + *kl;
+	i__3 = f2cmin(i__4,*m);
+	for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) {
+/* Computing MAX */
+	    d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], 
+		    abs(d__1));
+	    r__[i__] = f2cmax(d__2,d__3);
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.;
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__1 = rcmax, d__2 = r__[i__];
+	rcmax = f2cmax(d__1,d__2);
+/* Computing MIN */
+	d__1 = rcmin, d__2 = r__[i__];
+	rcmin = f2cmin(d__1,d__2);
+/* L40: */
+    }
+    *amax = rcmax;
+
+    if (rcmin == 0.) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (r__[i__] == 0.) {
+		*info = i__;
+		return 0;
+	    }
+/* L50: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MIN */
+/* Computing MAX */
+	    d__2 = r__[i__];
+	    d__1 = f2cmax(d__2,smlnum);
+	    r__[i__] = 1. / f2cmin(d__1,bignum);
+/* L60: */
+	}
+
+/*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)) */
+
+	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+/*     Compute column scale factors */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	c__[j] = 0.;
+/* L70: */
+    }
+
+/*     Find the maximum element in each column, */
+/*     assuming the row scaling computed above. */
+
+    kd = *ku + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	i__3 = j - *ku;
+/* Computing MIN */
+	i__4 = j + *kl;
+	i__2 = f2cmin(i__4,*m);
+	for (i__ = f2cmax(i__3,1); i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(
+		    d__1)) * r__[i__];
+	    c__[j] = f2cmax(d__2,d__3);
+/* L80: */
+	}
+/* L90: */
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	d__1 = rcmin, d__2 = c__[j];
+	rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+	d__1 = rcmax, d__2 = c__[j];
+	rcmax = f2cmax(d__1,d__2);
+/* L100: */
+    }
+
+    if (rcmin == 0.) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (c__[j] == 0.) {
+		*info = *m + j;
+		return 0;
+	    }
+/* L110: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+/* Computing MAX */
+	    d__2 = c__[j];
+	    d__1 = f2cmax(d__2,smlnum);
+	    c__[j] = 1. / f2cmin(d__1,bignum);
+/* L120: */
+	}
+
+/*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)) */
+
+	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+    return 0;
+
+/*     End of DGBEQU */
+
+} /* dgbequ_ */
+
diff --git a/lapack-netlib/SRC/dgbequb.c b/lapack-netlib/SRC/dgbequb.c
new file mode 100644
index 000000000..906e4ed41
--- /dev/null
+++ b/lapack-netlib/SRC/dgbequb.c
@@ -0,0 +1,783 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGBEQUB */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBEQUB + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbequb
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbequb
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbequb
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, */
+/*                           AMAX, INFO ) */
+
+/*       INTEGER            INFO, KL, KU, LDAB, M, N */
+/*       DOUBLE PRECISION   AMAX, COLCND, ROWCND */
+/*       DOUBLE PRECISION   AB( LDAB, * ), C( * ), R( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBEQUB computes row and column scalings intended to equilibrate an */
+/* > M-by-N matrix A and reduce its condition number.  R returns the row */
+/* > scale factors and C the column scale factors, chosen to try to make */
+/* > the largest element in each row and column of the matrix B with */
+/* > elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */
+/* > the radix. */
+/* > */
+/* > R(i) and C(j) are restricted to be a power of the radix between */
+/* > SMLNUM = smallest safe number and BIGNUM = largest safe number.  Use */
+/* > of these scaling factors is not guaranteed to reduce the condition */
+/* > number of A but works well in practice. */
+/* > */
+/* > This routine differs from DGEEQU by restricting the scaling factors */
+/* > to a power of the radix.  Barring over- and underflow, scaling by */
+/* > these factors introduces no additional rounding errors.  However, the */
+/* > scaled entries' magnitudes are no longer approximately 1 but lie */
+/* > between sqrt(radix) and 1/sqrt(radix). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */
+/* >          The j-th column of A is stored in the j-th column of the */
+/* >          array AB as follows: */
+/* >          AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array A.  LDAB >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (M) */
+/* >          If INFO = 0 or INFO > M, R contains the row scale factors */
+/* >          for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >          If INFO = 0,  C contains the column scale factors for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ROWCND */
+/* > \verbatim */
+/* >          ROWCND is DOUBLE PRECISION */
+/* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
+/* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
+/* >          AMAX is neither too large nor too small, it is not worth */
+/* >          scaling by R. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] COLCND */
+/* > \verbatim */
+/* >          COLCND is DOUBLE PRECISION */
+/* >          If INFO = 0, COLCND contains the ratio of the smallest */
+/* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
+/* >          worth scaling by C. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] AMAX */
+/* > \verbatim */
+/* >          AMAX is DOUBLE PRECISION */
+/* >          Absolute value of largest matrix element.  If AMAX is very */
+/* >          close to overflow or very close to underflow, the matrix */
+/* >          should be scaled. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i,  and i is */
+/* >                <= M:  the i-th row of A is exactly zero */
+/* >                >  M:  the (i-M)-th column of A is exactly zero */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgbequb_(integer *m, integer *n, integer *kl, integer *
+	ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, 
+	doublereal *rowcnd, doublereal *colcnd, doublereal *amax, integer *
+	info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    integer i__, j;
+    doublereal radix, rcmin, rcmax;
+    integer kd;
+    extern doublereal dlamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    doublereal bignum, logrdx, smlnum;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --r__;
+    --c__;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0) {
+	*info = -3;
+    } else if (*ku < 0) {
+	*info = -4;
+    } else if (*ldab < *kl + *ku + 1) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBEQUB", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0) {
+	*rowcnd = 1.;
+	*colcnd = 1.;
+	*amax = 0.;
+	return 0;
+    }
+
+/*     Get machine constants.  Assume SMLNUM is a power of the radix. */
+
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+    radix = dlamch_("B");
+    logrdx = log(radix);
+
+/*     Compute row scale factors. */
+
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__[i__] = 0.;
+/* L10: */
+    }
+
+/*     Find the maximum element in each row. */
+
+    kd = *ku + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	i__2 = j - *ku;
+/* Computing MIN */
+	i__4 = j + *kl;
+	i__3 = f2cmin(i__4,*m);
+	for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) {
+/* Computing MAX */
+	    d__2 = r__[i__], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], 
+		    abs(d__1));
+	    r__[i__] = f2cmax(d__2,d__3);
+/* L20: */
+	}
+/* L30: */
+    }
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (r__[i__] > 0.) {
+	    i__3 = (integer) (log(r__[i__]) / logrdx);
+	    r__[i__] = pow_di(&radix, &i__3);
+	}
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.;
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__1 = rcmax, d__2 = r__[i__];
+	rcmax = f2cmax(d__1,d__2);
+/* Computing MIN */
+	d__1 = rcmin, d__2 = r__[i__];
+	rcmin = f2cmin(d__1,d__2);
+/* L40: */
+    }
+    *amax = rcmax;
+
+    if (rcmin == 0.) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (r__[i__] == 0.) {
+		*info = i__;
+		return 0;
+	    }
+/* L50: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MIN */
+/* Computing MAX */
+	    d__2 = r__[i__];
+	    d__1 = f2cmax(d__2,smlnum);
+	    r__[i__] = 1. / f2cmin(d__1,bignum);
+/* L60: */
+	}
+
+/*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)). */
+
+	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+/*     Compute column scale factors. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	c__[j] = 0.;
+/* L70: */
+    }
+
+/*     Find the maximum element in each column, */
+/*     assuming the row scaling computed above. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	i__3 = j - *ku;
+/* Computing MIN */
+	i__4 = j + *kl;
+	i__2 = f2cmin(i__4,*m);
+	for (i__ = f2cmax(i__3,1); i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    d__2 = c__[j], d__3 = (d__1 = ab[kd + i__ - j + j * ab_dim1], abs(
+		    d__1)) * r__[i__];
+	    c__[j] = f2cmax(d__2,d__3);
+/* L80: */
+	}
+	if (c__[j] > 0.) {
+	    i__2 = (integer) (log(c__[j]) / logrdx);
+	    c__[j] = pow_di(&radix, &i__2);
+	}
+/* L90: */
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	d__1 = rcmin, d__2 = c__[j];
+	rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+	d__1 = rcmax, d__2 = c__[j];
+	rcmax = f2cmax(d__1,d__2);
+/* L100: */
+    }
+
+    if (rcmin == 0.) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (c__[j] == 0.) {
+		*info = *m + j;
+		return 0;
+	    }
+/* L110: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+/* Computing MAX */
+	    d__2 = c__[j];
+	    d__1 = f2cmax(d__2,smlnum);
+	    c__[j] = 1. / f2cmin(d__1,bignum);
+/* L120: */
+	}
+
+/*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)). */
+
+	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+    return 0;
+
+/*     End of DGBEQUB */
+
+} /* dgbequb_ */
+
diff --git a/lapack-netlib/SRC/dgbrfs.c b/lapack-netlib/SRC/dgbrfs.c
new file mode 100644
index 000000000..14984b885
--- /dev/null
+++ b/lapack-netlib/SRC/dgbrfs.c
@@ -0,0 +1,919 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b15 = -1.;
+static doublereal c_b17 = 1.;
+
+/* > \brief \b DGBRFS */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBRFS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbrfs.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbrfs.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbrfs.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, */
+/*                          IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, */
+/*                          INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */
+/*      $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBRFS improves the computed solution to a system of linear */
+/* > equations when the coefficient matrix is banded, and provides */
+/* > error bounds and backward error estimates for the solution. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations: */
+/* >          = 'N':  A * X = B     (No transpose) */
+/* >          = 'T':  A**T * X = B  (Transpose) */
+/* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          The original band matrix A, stored in rows 1 to KL+KU+1. */
+/* >          The j-th column of A is stored in the j-th column of the */
+/* >          array AB as follows: */
+/* >          AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(n,j+kl). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array AB.  LDAB >= KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AFB */
+/* > \verbatim */
+/* >          AFB is DOUBLE PRECISION array, dimension (LDAFB,N) */
+/* >          Details of the LU factorization of the band matrix A, as */
+/* >          computed by DGBTRF.  U is stored as an upper triangular band */
+/* >          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
+/* >          the multipliers used during the factorization are stored in */
+/* >          rows KL+KU+2 to 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAFB */
+/* > \verbatim */
+/* >          LDAFB is INTEGER */
+/* >          The leading dimension of the array AFB.  LDAFB >= 2*KL*KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices from DGBTRF; for 1<=i<=N, row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          The right hand side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
+/* >          On entry, the solution matrix X, as computed by DGBTRS. */
+/* >          On exit, the improved solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] FERR */
+/* > \verbatim */
+/* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The estimated forward error bound for each solution vector */
+/* >          X(j) (the j-th column of the solution matrix X). */
+/* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
+/* >          is an estimated upper bound for the magnitude of the largest */
+/* >          element in (X(j) - XTRUE) divided by the magnitude of the */
+/* >          largest element in X(j).  The estimate is as reliable as */
+/* >          the estimate for RCOND, and is almost always a slight */
+/* >          overestimate of the true error. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The componentwise relative backward error of each solution */
+/* >          vector X(j) (i.e., the smallest relative change in */
+/* >          any element of A or B that makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (3*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/* > \par Internal Parameters: */
+/*  ========================= */
+/* > */
+/* > \verbatim */
+/* >  ITMAX is the maximum number of steps of iterative refinement. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgbrfs_(char *trans, integer *n, integer *kl, integer *
+	ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, 
+	integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, 
+	doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, 
+	doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, 
+	    x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    integer kase;
+    doublereal safe1, safe2;
+    integer i__, j, k;
+    doublereal s;
+    extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer *
+	    , integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    integer isave[3];
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *), daxpy_(integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *);
+    integer count;
+    extern /* Subroutine */ int dlacn2_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, integer *);
+    integer kk;
+    extern doublereal dlamch_(char *);
+    doublereal xk;
+    integer nz;
+    doublereal safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dgbtrs_(
+	    char *, integer *, integer *, integer *, integer *, doublereal *, 
+	    integer *, integer *, doublereal *, integer *, integer *);
+    logical notran;
+    char transt[1];
+    doublereal lstres, eps;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    afb_dim1 = *ldafb;
+    afb_offset = 1 + afb_dim1 * 1;
+    afb -= afb_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --ferr;
+    --berr;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0) {
+	*info = -3;
+    } else if (*ku < 0) {
+	*info = -4;
+    } else if (*nrhs < 0) {
+	*info = -5;
+    } else if (*ldab < *kl + *ku + 1) {
+	*info = -7;
+    } else if (*ldafb < (*kl << 1) + *ku + 1) {
+	*info = -9;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -12;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBRFS", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    ferr[j] = 0.;
+	    berr[j] = 0.;
+/* L10: */
+	}
+	return 0;
+    }
+
+    if (notran) {
+	*(unsigned char *)transt = 'T';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */
+
+/* Computing MIN */
+    i__1 = *kl + *ku + 2, i__2 = *n + 1;
+    nz = f2cmin(i__1,i__2);
+    eps = dlamch_("Epsilon");
+    safmin = dlamch_("Safe minimum");
+    safe1 = nz * safmin;
+    safe2 = safe1 / eps;
+
+/*     Do for each right hand side */
+
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+
+	count = 1;
+	lstres = 3.;
+L20:
+
+/*        Loop until stopping criterion is satisfied. */
+
+/*        Compute residual R = B - op(A) * X, */
+/*        where op(A) = A, A**T, or A**H, depending on TRANS. */
+
+	dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
+	dgbmv_(trans, n, n, kl, ku, &c_b15, &ab[ab_offset], ldab, &x[j * 
+		x_dim1 + 1], &c__1, &c_b17, &work[*n + 1], &c__1);
+
+/*        Compute componentwise relative backward error from formula */
+
+/*        f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */
+
+/*        where abs(Z) is the componentwise absolute value of the matrix */
+/*        or vector Z.  If the i-th component of the denominator is less */
+/*        than SAFE2, then SAFE1 is added to the i-th components of the */
+/*        numerator and denominator before dividing. */
+
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1));
+/* L30: */
+	}
+
+/*        Compute abs(op(A))*abs(X) + abs(B). */
+
+	if (notran) {
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		kk = *ku + 1 - k;
+		xk = (d__1 = x[k + j * x_dim1], abs(d__1));
+/* Computing MAX */
+		i__3 = 1, i__4 = k - *ku;
+/* Computing MIN */
+		i__6 = *n, i__7 = k + *kl;
+		i__5 = f2cmin(i__6,i__7);
+		for (i__ = f2cmax(i__3,i__4); i__ <= i__5; ++i__) {
+		    work[i__] += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1)
+			    ) * xk;
+/* L40: */
+		}
+/* L50: */
+	    }
+	} else {
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		s = 0.;
+		kk = *ku + 1 - k;
+/* Computing MAX */
+		i__5 = 1, i__3 = k - *ku;
+/* Computing MIN */
+		i__6 = *n, i__7 = k + *kl;
+		i__4 = f2cmin(i__6,i__7);
+		for (i__ = f2cmax(i__5,i__3); i__ <= i__4; ++i__) {
+		    s += (d__1 = ab[kk + i__ + k * ab_dim1], abs(d__1)) * (
+			    d__2 = x[i__ + j * x_dim1], abs(d__2));
+/* L60: */
+		}
+		work[k] += s;
+/* L70: */
+	    }
+	}
+	s = 0.;
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    if (work[i__] > safe2) {
+/* Computing MAX */
+		d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[
+			i__];
+		s = f2cmax(d__2,d__3);
+	    } else {
+/* Computing MAX */
+		d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) 
+			/ (work[i__] + safe1);
+		s = f2cmax(d__2,d__3);
+	    }
+/* L80: */
+	}
+	berr[j] = s;
+
+/*        Test stopping criterion. Continue iterating if */
+/*           1) The residual BERR(J) is larger than machine epsilon, and */
+/*           2) BERR(J) decreased by at least a factor of 2 during the */
+/*              last iteration, and */
+/*           3) At most ITMAX iterations tried. */
+
+	if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) {
+
+/*           Update solution and try again. */
+
+	    dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1]
+		    , &work[*n + 1], n, info);
+	    daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
+		    ;
+	    lstres = berr[j];
+	    ++count;
+	    goto L20;
+	}
+
+/*        Bound error from formula */
+
+/*        norm(X - XTRUE) / norm(X) .le. FERR = */
+/*        norm( abs(inv(op(A)))* */
+/*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */
+
+/*        where */
+/*          norm(Z) is the magnitude of the largest component of Z */
+/*          inv(op(A)) is the inverse of op(A) */
+/*          abs(Z) is the componentwise absolute value of the matrix or */
+/*             vector Z */
+/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
+/*          EPS is machine epsilon */
+
+/*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
+/*        is incremented by SAFE1 if the i-th component of */
+/*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */
+
+/*        Use DLACN2 to estimate the infinity-norm of the matrix */
+/*           inv(op(A)) * diag(W), */
+/*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
+
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    if (work[i__] > safe2) {
+		work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * 
+			work[i__];
+	    } else {
+		work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * 
+			work[i__] + safe1;
+	    }
+/* L90: */
+	}
+
+	kase = 0;
+L100:
+	dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
+		kase, isave);
+	if (kase != 0) {
+	    if (kase == 1) {
+
+/*              Multiply by diag(W)*inv(op(A)**T). */
+
+		dgbtrs_(transt, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &
+			ipiv[1], &work[*n + 1], n, info);
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    work[*n + i__] *= work[i__];
+/* L110: */
+		}
+	    } else {
+
+/*              Multiply by inv(op(A))*diag(W). */
+
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    work[*n + i__] *= work[i__];
+/* L120: */
+		}
+		dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &
+			ipiv[1], &work[*n + 1], n, info);
+	    }
+	    goto L100;
+	}
+
+/*        Normalize error. */
+
+	lstres = 0.;
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1));
+	    lstres = f2cmax(d__2,d__3);
+/* L130: */
+	}
+	if (lstres != 0.) {
+	    ferr[j] /= lstres;
+	}
+
+/* L140: */
+    }
+
+    return 0;
+
+/*     End of DGBRFS */
+
+} /* dgbrfs_ */
+
diff --git a/lapack-netlib/SRC/dgbrfsx.c b/lapack-netlib/SRC/dgbrfsx.c
new file mode 100644
index 000000000..b6ff69087
--- /dev/null
+++ b/lapack-netlib/SRC/dgbrfsx.c
@@ -0,0 +1,1181 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__0 = 0;
+static integer c__1 = 1;
+
+/* > \brief \b DGBRFSX */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBRFSX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbrfsx
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbrfsx
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbrfsx
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, */
+/*                           LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, */
+/*                           BERR, N_ERR_BNDS, ERR_BNDS_NORM, */
+/*                           ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, */
+/*                           INFO ) */
+
+/*       CHARACTER          TRANS, EQUED */
+/*       INTEGER            INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, */
+/*      $                   NPARAMS, N_ERR_BNDS */
+/*       DOUBLE PRECISION   RCOND */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */
+/*      $                   X( LDX , * ),WORK( * ) */
+/*       DOUBLE PRECISION   R( * ), C( * ), PARAMS( * ), BERR( * ), */
+/*      $                   ERR_BNDS_NORM( NRHS, * ), */
+/*      $                   ERR_BNDS_COMP( NRHS, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    DGBRFSX improves the computed solution to a system of linear */
+/* >    equations and provides error bounds and backward error estimates */
+/* >    for the solution.  In addition to normwise error bound, the code */
+/* >    provides maximum componentwise error bound if possible.  See */
+/* >    comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */
+/* >    error bounds. */
+/* > */
+/* >    The original system of linear equations may have been equilibrated */
+/* >    before calling this routine, as described by arguments EQUED, R */
+/* >    and C below. In this case, the solution and error bounds returned */
+/* >    are for the original unequilibrated system. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \verbatim */
+/* >     Some optional parameters are bundled in the PARAMS array.  These */
+/* >     settings determine how refinement is performed, but often the */
+/* >     defaults are acceptable.  If the defaults are acceptable, users */
+/* >     can pass NPARAMS = 0 which prevents the source code from accessing */
+/* >     the PARAMS argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >     Specifies the form of the system of equations: */
+/* >       = 'N':  A * X = B     (No transpose) */
+/* >       = 'T':  A**T * X = B  (Transpose) */
+/* >       = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] EQUED */
+/* > \verbatim */
+/* >          EQUED is CHARACTER*1 */
+/* >     Specifies the form of equilibration that was done to A */
+/* >     before calling this routine. This is needed to compute */
+/* >     the solution and error bounds correctly. */
+/* >       = 'N':  No equilibration */
+/* >       = 'R':  Row equilibration, i.e., A has been premultiplied by */
+/* >               diag(R). */
+/* >       = 'C':  Column equilibration, i.e., A has been postmultiplied */
+/* >               by diag(C). */
+/* >       = 'B':  Both row and column equilibration, i.e., A has been */
+/* >               replaced by diag(R) * A * diag(C). */
+/* >               The right hand side B has been changed accordingly. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >     The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >     The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >     The number of right hand sides, i.e., the number of columns */
+/* >     of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >     The original band matrix A, stored in rows 1 to KL+KU+1. */
+/* >     The j-th column of A is stored in the j-th column of the */
+/* >     array AB as follows: */
+/* >     AB(ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(n,j+kl). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >     The leading dimension of the array AB.  LDAB >= KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AFB */
+/* > \verbatim */
+/* >          AFB is DOUBLE PRECISION array, dimension (LDAFB,N) */
+/* >     Details of the LU factorization of the band matrix A, as */
+/* >     computed by DGBTRF.  U is stored as an upper triangular band */
+/* >     matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
+/* >     the multipliers used during the factorization are stored in */
+/* >     rows KL+KU+2 to 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAFB */
+/* > \verbatim */
+/* >          LDAFB is INTEGER */
+/* >     The leading dimension of the array AFB.  LDAFB >= 2*KL*KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     The pivot indices from DGETRF; for 1<=i<=N, row i of the */
+/* >     matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (N) */
+/* >     The row scale factors for A.  If EQUED = 'R' or 'B', A is */
+/* >     multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */
+/* >     is not accessed.  R is an input argument if FACT = 'F'; */
+/* >     otherwise, R is an output argument.  If FACT = 'F' and */
+/* >     EQUED = 'R' or 'B', each element of R must be positive. */
+/* >     If R is output, each element of R is a power of the radix. */
+/* >     If R is input, each element of R should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >     The column scale factors for A.  If EQUED = 'C' or 'B', A is */
+/* >     multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */
+/* >     is not accessed.  C is an input argument if FACT = 'F'; */
+/* >     otherwise, C is an output argument.  If FACT = 'F' and */
+/* >     EQUED = 'C' or 'B', each element of C must be positive. */
+/* >     If C is output, each element of C is a power of the radix. */
+/* >     If C is input, each element of C should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >     The right hand side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >     The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
+/* >     On entry, the solution matrix X, as computed by DGETRS. */
+/* >     On exit, the improved solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >     The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >     Reciprocal scaled condition number.  This is an estimate of the */
+/* >     reciprocal Skeel condition number of the matrix A after */
+/* >     equilibration (if done).  If this is less than the machine */
+/* >     precision (in particular, if it is zero), the matrix is singular */
+/* >     to working precision.  Note that the error may still be small even */
+/* >     if this number is very small and the matrix appears ill- */
+/* >     conditioned. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >     Componentwise relative backward error.  This is the */
+/* >     componentwise relative backward error of each solution vector X(j) */
+/* >     (i.e., the smallest relative change in any element of A or B that */
+/* >     makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N_ERR_BNDS */
+/* > \verbatim */
+/* >          N_ERR_BNDS is INTEGER */
+/* >     Number of error bounds to return for each right hand side */
+/* >     and each type (normwise or componentwise).  See ERR_BNDS_NORM and */
+/* >     ERR_BNDS_COMP below. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_NORM */
+/* > \verbatim */
+/* >          ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     normwise relative error, which is defined as follows: */
+/* > */
+/* >     Normwise relative error in the ith solution vector: */
+/* >             max_j (abs(XTRUE(j,i) - X(j,i))) */
+/* >            ------------------------------ */
+/* >                  max_j abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the type of error information as described */
+/* >     below. There currently are up to three pieces of information */
+/* >     returned. */
+/* > */
+/* >     The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_NORM(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated normwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * dlamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*A, where S scales each row by a power of the */
+/* >              radix so all absolute row sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_COMP */
+/* > \verbatim */
+/* >          ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     componentwise relative error, which is defined as follows: */
+/* > */
+/* >     Componentwise relative error in the ith solution vector: */
+/* >                    abs(XTRUE(j,i) - X(j,i)) */
+/* >             max_j ---------------------- */
+/* >                         abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the right-hand side i (on which the */
+/* >     componentwise relative error depends), and the type of error */
+/* >     information as described below. There currently are up to three */
+/* >     pieces of information returned for each right-hand side. If */
+/* >     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
+/* >     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most */
+/* >     the first (:,N_ERR_BNDS) entries are returned. */
+/* > */
+/* >     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_COMP(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated componentwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * dlamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*(A*diag(x)), where x is the solution for the */
+/* >              current right-hand side and S scales each row of */
+/* >              A*diag(x) by a power of the radix so all absolute row */
+/* >              sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NPARAMS */
+/* > \verbatim */
+/* >          NPARAMS is INTEGER */
+/* >     Specifies the number of parameters set in PARAMS.  If <= 0, the */
+/* >     PARAMS array is never referenced and default values are used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] PARAMS */
+/* > \verbatim */
+/* >          PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) */
+/* >     Specifies algorithm parameters.  If an entry is < 0.0, then */
+/* >     that entry will be filled with default value used for that */
+/* >     parameter.  Only positions up to NPARAMS are accessed; defaults */
+/* >     are used for higher-numbered parameters. */
+/* > */
+/* >       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */
+/* >            refinement or not. */
+/* >         Default: 1.0D+0 */
+/* >            = 0.0:  No refinement is performed, and no error bounds are */
+/* >                    computed. */
+/* >            = 1.0:  Use the double-precision refinement algorithm, */
+/* >                    possibly with doubled-single computations if the */
+/* >                    compilation environment does not support DOUBLE */
+/* >                    PRECISION. */
+/* >              (other values are reserved for future use) */
+/* > */
+/* >       PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */
+/* >            computations allowed for refinement. */
+/* >         Default: 10 */
+/* >         Aggressive: Set to 100 to permit convergence using approximate */
+/* >                     factorizations or factorizations other than LU. If */
+/* >                     the factorization uses a technique other than */
+/* >                     Gaussian elimination, the guarantees in */
+/* >                     err_bnds_norm and err_bnds_comp may no longer be */
+/* >                     trustworthy. */
+/* > */
+/* >       PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */
+/* >            will attempt to find a solution with small componentwise */
+/* >            relative error in the double-precision algorithm.  Positive */
+/* >            is true, 0.0 is false. */
+/* >         Default: 1.0 (attempt componentwise convergence) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (4*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. The solution to every right-hand side is */
+/* >         guaranteed. */
+/* >       < 0:  If INFO = -i, the i-th argument had an illegal value */
+/* >       > 0 and <= N:  U(INFO,INFO) is exactly zero.  The factorization */
+/* >         has been completed, but the factor U is exactly singular, so */
+/* >         the solution and error bounds could not be computed. RCOND = 0 */
+/* >         is returned. */
+/* >       = N+J: The solution corresponding to the Jth right-hand side is */
+/* >         not guaranteed. The solutions corresponding to other right- */
+/* >         hand sides K with K > J may not be guaranteed as well, but */
+/* >         only the first such right-hand side is reported. If a small */
+/* >         componentwise error is not requested (PARAMS(3) = 0.0) then */
+/* >         the Jth right-hand side is the first with a normwise error */
+/* >         bound that is not guaranteed (the smallest J such */
+/* >         that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */
+/* >         the Jth right-hand side is the first with either a normwise or */
+/* >         componentwise error bound that is not guaranteed (the smallest */
+/* >         J such that either ERR_BNDS_NORM(J,1) = 0.0 or */
+/* >         ERR_BNDS_COMP(J,1) = 0.0). See the definition of */
+/* >         ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */
+/* >         about all of the right-hand sides check ERR_BNDS_NORM or */
+/* >         ERR_BNDS_COMP. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup doubleGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgbrfsx_(char *trans, char *equed, integer *n, integer *
+	kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, 
+	doublereal *afb, integer *ldafb, integer *ipiv, doublereal *r__, 
+	doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *
+	ldx, doublereal *rcond, doublereal *berr, integer *n_err_bnds__, 
+	doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *
+	nparams, doublereal *params, doublereal *work, integer *iwork, 
+	integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, 
+	    x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, 
+	    err_bnds_comp_dim1, err_bnds_comp_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__;
+    char norm[1];
+    integer ref_type__;
+    extern integer ilatrans_(char *);
+    logical ignore_cwise__;
+    integer j;
+    extern logical lsame_(char *, char *);
+    doublereal anorm, rcond_tmp__;
+    integer prec_type__;
+    extern doublereal dlamch_(char *), dlangb_(char *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgbcon_(char *, integer *, integer *, integer 
+	    *, doublereal *, integer *, integer *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *), xerbla_(char *, 
+	    integer *, ftnlen);
+    logical colequ, notran, rowequ;
+    integer trans_type__;
+    extern doublereal dla_gbrcond_(char *, integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *);
+    extern integer ilaprec_(char *);
+    integer ithresh, n_norms__;
+    doublereal rthresh, cwise_wrong__;
+    extern /* Subroutine */ int dla_gbrfsx_extended_(integer *, integer *, 
+	    integer *, integer *, integer *, integer *, doublereal *, integer 
+	    *, doublereal *, integer *, integer *, logical *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, doublereal *, doublereal *, integer *, doublereal *
+	    , doublereal *, logical *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ================================================================== */
+
+
+/*     Check the input parameters. */
+
+    /* Parameter adjustments */
+    err_bnds_comp_dim1 = *nrhs;
+    err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1;
+    err_bnds_comp__ -= err_bnds_comp_offset;
+    err_bnds_norm_dim1 = *nrhs;
+    err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1;
+    err_bnds_norm__ -= err_bnds_norm_offset;
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    afb_dim1 = *ldafb;
+    afb_offset = 1 + afb_dim1 * 1;
+    afb -= afb_offset;
+    --ipiv;
+    --r__;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --berr;
+    --params;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    trans_type__ = ilatrans_(trans);
+    ref_type__ = 1;
+    if (*nparams >= 1) {
+	if (params[1] < 0.) {
+	    params[1] = 1.;
+	} else {
+	    ref_type__ = (integer) params[1];
+	}
+    }
+
+/*     Set default parameters. */
+
+    illrcond_thresh__ = (doublereal) (*n) * dlamch_("Epsilon");
+    ithresh = 10;
+    rthresh = .5;
+    unstable_thresh__ = .25;
+    ignore_cwise__ = FALSE_;
+
+    if (*nparams >= 2) {
+	if (params[2] < 0.) {
+	    params[2] = (doublereal) ithresh;
+	} else {
+	    ithresh = (integer) params[2];
+	}
+    }
+    if (*nparams >= 3) {
+	if (params[3] < 0.) {
+	    if (ignore_cwise__) {
+		params[3] = 0.;
+	    } else {
+		params[3] = 1.;
+	    }
+	} else {
+	    ignore_cwise__ = params[3] == 0.;
+	}
+    }
+    if (ref_type__ == 0 || *n_err_bnds__ == 0) {
+	n_norms__ = 0;
+    } else if (ignore_cwise__) {
+	n_norms__ = 1;
+    } else {
+	n_norms__ = 2;
+    }
+
+    notran = lsame_(trans, "N");
+    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
+    colequ = lsame_(equed, "C") || lsame_(equed, "B");
+
+/*     Test input parameters. */
+
+    if (trans_type__ == -1) {
+	*info = -1;
+    } else if (! rowequ && ! colequ && ! lsame_(equed, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*kl < 0) {
+	*info = -4;
+    } else if (*ku < 0) {
+	*info = -5;
+    } else if (*nrhs < 0) {
+	*info = -6;
+    } else if (*ldab < *kl + *ku + 1) {
+	*info = -8;
+    } else if (*ldafb < (*kl << 1) + *ku + 1) {
+	*info = -10;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -13;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -15;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBRFSX", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || *nrhs == 0) {
+	*rcond = 1.;
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    berr[j] = 0.;
+	    if (*n_err_bnds__ >= 1) {
+		err_bnds_norm__[j + err_bnds_norm_dim1] = 1.;
+		err_bnds_comp__[j + err_bnds_comp_dim1] = 1.;
+	    }
+	    if (*n_err_bnds__ >= 2) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.;
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.;
+	    }
+	    if (*n_err_bnds__ >= 3) {
+		err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.;
+		err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.;
+	    }
+	}
+	return 0;
+    }
+
+/*     Default to failure. */
+
+    *rcond = 0.;
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+	berr[j] = 1.;
+	if (*n_err_bnds__ >= 1) {
+	    err_bnds_norm__[j + err_bnds_norm_dim1] = 1.;
+	    err_bnds_comp__[j + err_bnds_comp_dim1] = 1.;
+	}
+	if (*n_err_bnds__ >= 2) {
+	    err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.;
+	    err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.;
+	}
+	if (*n_err_bnds__ >= 3) {
+	    err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.;
+	    err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.;
+	}
+    }
+
+/*     Compute the norm of A and the reciprocal of the condition */
+/*     number of A. */
+
+    if (notran) {
+	*(unsigned char *)norm = 'I';
+    } else {
+	*(unsigned char *)norm = '1';
+    }
+    anorm = dlangb_(norm, n, kl, ku, &ab[ab_offset], ldab, &work[1]);
+    dgbcon_(norm, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], &anorm, rcond,
+	     &work[1], &iwork[1], info);
+
+/*     Perform refinement on each right-hand side */
+
+    if (ref_type__ != 0 && *info == 0) {
+	prec_type__ = ilaprec_("E");
+	if (notran) {
+	    dla_gbrfsx_extended_(&prec_type__, &trans_type__, n, kl, ku, 
+		    nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, &
+		    ipiv[1], &colequ, &c__[1], &b[b_offset], ldb, &x[x_offset]
+		    , ldx, &berr[1], &n_norms__, &err_bnds_norm__[
+		    err_bnds_norm_offset], &err_bnds_comp__[
+		    err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n 
+		    << 1) + 1], &work[1], rcond, &ithresh, &rthresh, &
+		    unstable_thresh__, &ignore_cwise__, info);
+	} else {
+	    dla_gbrfsx_extended_(&prec_type__, &trans_type__, n, kl, ku, 
+		    nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, &
+		    ipiv[1], &rowequ, &r__[1], &b[b_offset], ldb, &x[x_offset]
+		    , ldx, &berr[1], &n_norms__, &err_bnds_norm__[
+		    err_bnds_norm_offset], &err_bnds_comp__[
+		    err_bnds_comp_offset], &work[*n + 1], &work[1], &work[(*n 
+		    << 1) + 1], &work[1], rcond, &ithresh, &rthresh, &
+		    unstable_thresh__, &ignore_cwise__, info);
+	}
+    }
+/* Computing MAX */
+    d__1 = 10., d__2 = sqrt((doublereal) (*n));
+    err_lbnd__ = f2cmax(d__1,d__2) * dlamch_("Epsilon");
+    if (*n_err_bnds__ >= 1 && n_norms__ >= 1) {
+
+/*     Compute scaled normwise condition number cond(A*C). */
+
+	if (colequ && notran) {
+	    rcond_tmp__ = dla_gbrcond_(trans, n, kl, ku, &ab[ab_offset], 
+		    ldab, &afb[afb_offset], ldafb, &ipiv[1], &c_n1, &c__[1], 
+		    info, &work[1], &iwork[1]);
+	} else if (rowequ && ! notran) {
+	    rcond_tmp__ = dla_gbrcond_(trans, n, kl, ku, &ab[ab_offset], 
+		    ldab, &afb[afb_offset], ldafb, &ipiv[1], &c_n1, &r__[1], 
+		    info, &work[1], &iwork[1]);
+	} else {
+	    rcond_tmp__ = dla_gbrcond_(trans, n, kl, ku, &ab[ab_offset], 
+		    ldab, &afb[afb_offset], ldafb, &ipiv[1], &c__0, &r__[1], 
+		    info, &work[1], &iwork[1]);
+	}
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+
+/*     Cap the error at 1.0. */
+
+	    if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 
+		    << 1)] > 1.) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.;
+	    }
+
+/*     Threshold the error (see LAWN). */
+
+	    if (rcond_tmp__ < illrcond_thresh__) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.;
+		err_bnds_norm__[j + err_bnds_norm_dim1] = 0.;
+		if (*info <= *n) {
+		    *info = *n + j;
+		}
+	    } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < 
+		    err_lbnd__) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__;
+		err_bnds_norm__[j + err_bnds_norm_dim1] = 1.;
+	    }
+
+/*     Save the condition number. */
+
+	    if (*n_err_bnds__ >= 3) {
+		err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__;
+	    }
+	}
+    }
+    if (*n_err_bnds__ >= 1 && n_norms__ >= 2) {
+
+/*     Compute componentwise condition number cond(A*diag(Y(:,J))) for */
+/*     each right-hand side using the current solution as an estimate of */
+/*     the true solution.  If the componentwise error estimate is too */
+/*     large, then the solution is a lousy estimate of truth and the */
+/*     estimated RCOND may be too optimistic.  To avoid misleading users, */
+/*     the inverse condition number is set to 0.0 when the estimated */
+/*     cwise error is at least CWISE_WRONG. */
+
+	cwise_wrong__ = sqrt(dlamch_("Epsilon"));
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < 
+		    cwise_wrong__) {
+		rcond_tmp__ = dla_gbrcond_(trans, n, kl, ku, &ab[ab_offset], 
+			ldab, &afb[afb_offset], ldafb, &ipiv[1], &c__1, &x[j *
+			 x_dim1 + 1], info, &work[1], &iwork[1]);
+	    } else {
+		rcond_tmp__ = 0.;
+	    }
+
+/*     Cap the error at 1.0. */
+
+	    if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 
+		    << 1)] > 1.) {
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.;
+	    }
+
+/*     Threshold the error (see LAWN). */
+
+	    if (rcond_tmp__ < illrcond_thresh__) {
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.;
+		err_bnds_comp__[j + err_bnds_comp_dim1] = 0.;
+		if (params[3] == 1. && *info < *n + j) {
+		    *info = *n + j;
+		}
+	    } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < 
+		    err_lbnd__) {
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__;
+		err_bnds_comp__[j + err_bnds_comp_dim1] = 1.;
+	    }
+
+/*     Save the condition number. */
+
+	    if (*n_err_bnds__ >= 3) {
+		err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DGBRFSX */
+
+} /* dgbrfsx_ */
+
diff --git a/lapack-netlib/SRC/dgbsv.c b/lapack-netlib/SRC/dgbsv.c
new file mode 100644
index 000000000..5668af30f
--- /dev/null
+++ b/lapack-netlib/SRC/dgbsv.c
@@ -0,0 +1,622 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief <b> DGBSV computes the solution to system of linear equations A * X = B for GB matrices</b> (simpl
+e driver) */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBSV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbsv.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbsv.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbsv.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) */
+
+/*       INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBSV computes the solution to a real system of linear equations */
+/* > A * X = B, where A is a band matrix of order N with KL subdiagonals */
+/* > and KU superdiagonals, and X and B are N-by-NRHS matrices. */
+/* > */
+/* > The LU decomposition with partial pivoting and row interchanges is */
+/* > used to factor A as A = L * U, where L is a product of permutation */
+/* > and unit lower triangular matrices with KL subdiagonals, and U is */
+/* > upper triangular with KL+KU superdiagonals.  The factored form of A */
+/* > is then used to solve the system of equations A * X = B. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of linear equations, i.e., the order of the */
+/* >          matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          On entry, the matrix A in band storage, in rows KL+1 to */
+/* >          2*KL+KU+1; rows 1 to KL of the array need not be set. */
+/* >          The j-th column of A is stored in the j-th column of the */
+/* >          array AB as follows: */
+/* >          AB(KL+KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+KL) */
+/* >          On exit, details of the factorization: U is stored as an */
+/* >          upper triangular band matrix with KL+KU superdiagonals in */
+/* >          rows 1 to KL+KU+1, and the multipliers used during the */
+/* >          factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */
+/* >          See below for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices that define the permutation matrix P; */
+/* >          row i of the matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the N-by-NRHS right hand side matrix B. */
+/* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization */
+/* >                has been completed, but the factor U is exactly */
+/* >                singular, and the solution has not been computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGBsolve */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The band storage scheme is illustrated by the following example, when */
+/* >  M = N = 6, KL = 2, KU = 1: */
+/* > */
+/* >  On entry:                       On exit: */
+/* > */
+/* >      *    *    *    +    +    +       *    *    *   u14  u25  u36 */
+/* >      *    *    +    +    +    +       *    *   u13  u24  u35  u46 */
+/* >      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
+/* >     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */
+/* >     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   * */
+/* >     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    * */
+/* > */
+/* >  Array elements marked * are not used by the routine; elements marked */
+/* >  + need not be set on entry, but are required by the routine to store */
+/* >  elements of U because of fill-in resulting from the row interchanges. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgbsv_(integer *n, integer *kl, integer *ku, integer *
+	nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, 
+	integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dgbtrf_(integer *, integer *, integer *, 
+	    integer *, doublereal *, integer *, integer *, integer *), 
+	    xerbla_(char *, integer *, ftnlen), dgbtrs_(char *, integer *, 
+	    integer *, integer *, integer *, doublereal *, integer *, integer 
+	    *, doublereal *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*kl < 0) {
+	*info = -2;
+    } else if (*ku < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*ldab < (*kl << 1) + *ku + 1) {
+	*info = -6;
+    } else if (*ldb < f2cmax(*n,1)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBSV ", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Compute the LU factorization of the band matrix A. */
+
+    dgbtrf_(n, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);
+    if (*info == 0) {
+
+/*        Solve the system A*X = B, overwriting B with X. */
+
+	dgbtrs_("No transpose", n, kl, ku, nrhs, &ab[ab_offset], ldab, &ipiv[
+		1], &b[b_offset], ldb, info);
+    }
+    return 0;
+
+/*     End of DGBSV */
+
+} /* dgbsv_ */
+
diff --git a/lapack-netlib/SRC/dgbsvx.c b/lapack-netlib/SRC/dgbsvx.c
new file mode 100644
index 000000000..370e0670a
--- /dev/null
+++ b/lapack-netlib/SRC/dgbsvx.c
@@ -0,0 +1,1138 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief <b> DGBSVX computes the solution to system of linear equations A * X = B for GB matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBSVX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbsvx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbsvx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbsvx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, */
+/*                          LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, */
+/*                          RCOND, FERR, BERR, WORK, IWORK, INFO ) */
+
+/*       CHARACTER          EQUED, FACT, TRANS */
+/*       INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS */
+/*       DOUBLE PRECISION   RCOND */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */
+/*      $                   BERR( * ), C( * ), FERR( * ), R( * ), */
+/*      $                   WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBSVX uses the LU factorization to compute the solution to a real */
+/* > system of linear equations A * X = B, A**T * X = B, or A**H * X = B, */
+/* > where A is a band matrix of order N with KL subdiagonals and KU */
+/* > superdiagonals, and X and B are N-by-NRHS matrices. */
+/* > */
+/* > Error bounds on the solution and a condition estimate are also */
+/* > provided. */
+/* > \endverbatim */
+
+/* > \par Description: */
+/*  ================= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The following steps are performed by this subroutine: */
+/* > */
+/* > 1. If FACT = 'E', real scaling factors are computed to equilibrate */
+/* >    the system: */
+/* >       TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B */
+/* >       TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */
+/* >       TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */
+/* >    Whether or not the system will be equilibrated depends on the */
+/* >    scaling of the matrix A, but if equilibration is used, A is */
+/* >    overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */
+/* >    or diag(C)*B (if TRANS = 'T' or 'C'). */
+/* > */
+/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the */
+/* >    matrix A (after equilibration if FACT = 'E') as */
+/* >       A = L * U, */
+/* >    where L is a product of permutation and unit lower triangular */
+/* >    matrices with KL subdiagonals, and U is upper triangular with */
+/* >    KL+KU superdiagonals. */
+/* > */
+/* > 3. If some U(i,i)=0, so that U is exactly singular, then the routine */
+/* >    returns with INFO = i. Otherwise, the factored form of A is used */
+/* >    to estimate the condition number of the matrix A.  If the */
+/* >    reciprocal of the condition number is less than machine precision, */
+/* >    INFO = N+1 is returned as a warning, but the routine still goes on */
+/* >    to solve for X and compute error bounds as described below. */
+/* > */
+/* > 4. The system of equations is solved for X using the factored form */
+/* >    of A. */
+/* > */
+/* > 5. Iterative refinement is applied to improve the computed solution */
+/* >    matrix and calculate error bounds and backward error estimates */
+/* >    for it. */
+/* > */
+/* > 6. If equilibration was used, the matrix X is premultiplied by */
+/* >    diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */
+/* >    that it solves the original system before equilibration. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] FACT */
+/* > \verbatim */
+/* >          FACT is CHARACTER*1 */
+/* >          Specifies whether or not the factored form of the matrix A is */
+/* >          supplied on entry, and if not, whether the matrix A should be */
+/* >          equilibrated before it is factored. */
+/* >          = 'F':  On entry, AFB and IPIV contain the factored form of */
+/* >                  A.  If EQUED is not 'N', the matrix A has been */
+/* >                  equilibrated with scaling factors given by R and C. */
+/* >                  AB, AFB, and IPIV are not modified. */
+/* >          = 'N':  The matrix A will be copied to AFB and factored. */
+/* >          = 'E':  The matrix A will be equilibrated if necessary, then */
+/* >                  copied to AFB and factored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations. */
+/* >          = 'N':  A * X = B     (No transpose) */
+/* >          = 'T':  A**T * X = B  (Transpose) */
+/* >          = 'C':  A**H * X = B  (Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of linear equations, i.e., the order of the */
+/* >          matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */
+/* >          The j-th column of A is stored in the j-th column of the */
+/* >          array AB as follows: */
+/* >          AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */
+/* > */
+/* >          If FACT = 'F' and EQUED is not 'N', then A must have been */
+/* >          equilibrated by the scaling factors in R and/or C.  AB is not */
+/* >          modified if FACT = 'F' or 'N', or if FACT = 'E' and */
+/* >          EQUED = 'N' on exit. */
+/* > */
+/* >          On exit, if EQUED .ne. 'N', A is scaled as follows: */
+/* >          EQUED = 'R':  A := diag(R) * A */
+/* >          EQUED = 'C':  A := A * diag(C) */
+/* >          EQUED = 'B':  A := diag(R) * A * diag(C). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array AB.  LDAB >= KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AFB */
+/* > \verbatim */
+/* >          AFB is DOUBLE PRECISION array, dimension (LDAFB,N) */
+/* >          If FACT = 'F', then AFB is an input argument and on entry */
+/* >          contains details of the LU factorization of the band matrix */
+/* >          A, as computed by DGBTRF.  U is stored as an upper triangular */
+/* >          band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */
+/* >          and the multipliers used during the factorization are stored */
+/* >          in rows KL+KU+2 to 2*KL+KU+1.  If EQUED .ne. 'N', then AFB is */
+/* >          the factored form of the equilibrated matrix A. */
+/* > */
+/* >          If FACT = 'N', then AFB is an output argument and on exit */
+/* >          returns details of the LU factorization of A. */
+/* > */
+/* >          If FACT = 'E', then AFB is an output argument and on exit */
+/* >          returns details of the LU factorization of the equilibrated */
+/* >          matrix A (see the description of AB for the form of the */
+/* >          equilibrated matrix). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAFB */
+/* > \verbatim */
+/* >          LDAFB is INTEGER */
+/* >          The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          If FACT = 'F', then IPIV is an input argument and on entry */
+/* >          contains the pivot indices from the factorization A = L*U */
+/* >          as computed by DGBTRF; row i of the matrix was interchanged */
+/* >          with row IPIV(i). */
+/* > */
+/* >          If FACT = 'N', then IPIV is an output argument and on exit */
+/* >          contains the pivot indices from the factorization A = L*U */
+/* >          of the original matrix A. */
+/* > */
+/* >          If FACT = 'E', then IPIV is an output argument and on exit */
+/* >          contains the pivot indices from the factorization A = L*U */
+/* >          of the equilibrated matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] EQUED */
+/* > \verbatim */
+/* >          EQUED is CHARACTER*1 */
+/* >          Specifies the form of equilibration that was done. */
+/* >          = 'N':  No equilibration (always true if FACT = 'N'). */
+/* >          = 'R':  Row equilibration, i.e., A has been premultiplied by */
+/* >                  diag(R). */
+/* >          = 'C':  Column equilibration, i.e., A has been postmultiplied */
+/* >                  by diag(C). */
+/* >          = 'B':  Both row and column equilibration, i.e., A has been */
+/* >                  replaced by diag(R) * A * diag(C). */
+/* >          EQUED is an input argument if FACT = 'F'; otherwise, it is an */
+/* >          output argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (N) */
+/* >          The row scale factors for A.  If EQUED = 'R' or 'B', A is */
+/* >          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */
+/* >          is not accessed.  R is an input argument if FACT = 'F'; */
+/* >          otherwise, R is an output argument.  If FACT = 'F' and */
+/* >          EQUED = 'R' or 'B', each element of R must be positive. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >          The column scale factors for A.  If EQUED = 'C' or 'B', A is */
+/* >          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */
+/* >          is not accessed.  C is an input argument if FACT = 'F'; */
+/* >          otherwise, C is an output argument.  If FACT = 'F' and */
+/* >          EQUED = 'C' or 'B', each element of C must be positive. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the right hand side matrix B. */
+/* >          On exit, */
+/* >          if EQUED = 'N', B is not modified; */
+/* >          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */
+/* >          diag(R)*B; */
+/* >          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */
+/* >          overwritten by diag(C)*B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] X */
+/* > \verbatim */
+/* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
+/* >          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */
+/* >          to the original system of equations.  Note that A and B are */
+/* >          modified on exit if EQUED .ne. 'N', and the solution to the */
+/* >          equilibrated system is inv(diag(C))*X if TRANS = 'N' and */
+/* >          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */
+/* >          and EQUED = 'R' or 'B'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >          The estimate of the reciprocal condition number of the matrix */
+/* >          A after equilibration (if done).  If RCOND is less than the */
+/* >          machine precision (in particular, if RCOND = 0), the matrix */
+/* >          is singular to working precision.  This condition is */
+/* >          indicated by a return code of INFO > 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] FERR */
+/* > \verbatim */
+/* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The estimated forward error bound for each solution vector */
+/* >          X(j) (the j-th column of the solution matrix X). */
+/* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
+/* >          is an estimated upper bound for the magnitude of the largest */
+/* >          element in (X(j) - XTRUE) divided by the magnitude of the */
+/* >          largest element in X(j).  The estimate is as reliable as */
+/* >          the estimate for RCOND, and is almost always a slight */
+/* >          overestimate of the true error. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The componentwise relative backward error of each solution */
+/* >          vector X(j) (i.e., the smallest relative change in */
+/* >          any element of A or B that makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (3*N) */
+/* >          On exit, WORK(1) contains the reciprocal pivot growth */
+/* >          factor norm(A)/norm(U). The "f2cmax absolute element" norm is */
+/* >          used. If WORK(1) is much less than 1, then the stability */
+/* >          of the LU factorization of the (equilibrated) matrix A */
+/* >          could be poor. This also means that the solution X, condition */
+/* >          estimator RCOND, and forward error bound FERR could be */
+/* >          unreliable. If factorization fails with 0<INFO<=N, then */
+/* >          WORK(1) contains the reciprocal pivot growth factor for the */
+/* >          leading INFO columns of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, and i is */
+/* >                <= N:  U(i,i) is exactly zero.  The factorization */
+/* >                       has been completed, but the factor U is exactly */
+/* >                       singular, so the solution and error bounds */
+/* >                       could not be computed. RCOND = 0 is returned. */
+/* >                = N+1: U is nonsingular, but RCOND is less than machine */
+/* >                       precision, meaning that the matrix is singular */
+/* >                       to working precision.  Nevertheless, the */
+/* >                       solution and error bounds are computed because */
+/* >                       there are a number of situations where the */
+/* >                       computed solution can be more accurate than the */
+/* >                       value of RCOND would suggest. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup doubleGBsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgbsvx_(char *fact, char *trans, integer *n, integer *kl,
+	 integer *ku, integer *nrhs, doublereal *ab, integer *ldab, 
+	doublereal *afb, integer *ldafb, integer *ipiv, char *equed, 
+	doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, 
+	doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, 
+	doublereal *berr, doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, 
+	    x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    doublereal amax;
+    char norm[1];
+    integer i__, j;
+    extern logical lsame_(char *, char *);
+    doublereal rcmin, rcmax, anorm;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    logical equil;
+    integer j1, j2;
+    extern doublereal dlamch_(char *), dlangb_(char *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dlaqgb_(integer *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *, char *), 
+	    dgbcon_(char *, integer *, integer *, integer *, doublereal *, 
+	    integer *, integer *, doublereal *, doublereal *, doublereal *, 
+	    integer *, integer *);
+    doublereal colcnd;
+    extern doublereal dlantb_(char *, char *, char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgbequ_(integer *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *, integer *), dgbrfs_(
+	    char *, integer *, integer *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *, integer *), dgbtrf_(integer *, 
+	    integer *, integer *, integer *, doublereal *, integer *, integer 
+	    *, integer *);
+    logical nofact;
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen);
+    doublereal bignum;
+    extern /* Subroutine */ int dgbtrs_(char *, integer *, integer *, integer 
+	    *, integer *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *);
+    integer infequ;
+    logical colequ;
+    doublereal rowcnd;
+    logical notran;
+    doublereal smlnum;
+    logical rowequ;
+    doublereal rpvgrw;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    afb_dim1 = *ldafb;
+    afb_offset = 1 + afb_dim1 * 1;
+    afb -= afb_offset;
+    --ipiv;
+    --r__;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --ferr;
+    --berr;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    nofact = lsame_(fact, "N");
+    equil = lsame_(fact, "E");
+    notran = lsame_(trans, "N");
+    if (nofact || equil) {
+	*(unsigned char *)equed = 'N';
+	rowequ = FALSE_;
+	colequ = FALSE_;
+    } else {
+	rowequ = lsame_(equed, "R") || lsame_(equed, 
+		"B");
+	colequ = lsame_(equed, "C") || lsame_(equed, 
+		"B");
+	smlnum = dlamch_("Safe minimum");
+	bignum = 1. / smlnum;
+    }
+
+/*     Test the input parameters. */
+
+    if (! nofact && ! equil && ! lsame_(fact, "F")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*kl < 0) {
+	*info = -4;
+    } else if (*ku < 0) {
+	*info = -5;
+    } else if (*nrhs < 0) {
+	*info = -6;
+    } else if (*ldab < *kl + *ku + 1) {
+	*info = -8;
+    } else if (*ldafb < (*kl << 1) + *ku + 1) {
+	*info = -10;
+    } else if (lsame_(fact, "F") && ! (rowequ || colequ 
+	    || lsame_(equed, "N"))) {
+	*info = -12;
+    } else {
+	if (rowequ) {
+	    rcmin = bignum;
+	    rcmax = 0.;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		d__1 = rcmin, d__2 = r__[j];
+		rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+		d__1 = rcmax, d__2 = r__[j];
+		rcmax = f2cmax(d__1,d__2);
+/* L10: */
+	    }
+	    if (rcmin <= 0.) {
+		*info = -13;
+	    } else if (*n > 0) {
+		rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		rowcnd = 1.;
+	    }
+	}
+	if (colequ && *info == 0) {
+	    rcmin = bignum;
+	    rcmax = 0.;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		d__1 = rcmin, d__2 = c__[j];
+		rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+		d__1 = rcmax, d__2 = c__[j];
+		rcmax = f2cmax(d__1,d__2);
+/* L20: */
+	    }
+	    if (rcmin <= 0.) {
+		*info = -14;
+	    } else if (*n > 0) {
+		colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		colcnd = 1.;
+	    }
+	}
+	if (*info == 0) {
+	    if (*ldb < f2cmax(1,*n)) {
+		*info = -16;
+	    } else if (*ldx < f2cmax(1,*n)) {
+		*info = -18;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBSVX", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    if (equil) {
+
+/*        Compute row and column scalings to equilibrate the matrix A. */
+
+	dgbequ_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], &rowcnd,
+		 &colcnd, &amax, &infequ);
+	if (infequ == 0) {
+
+/*           Equilibrate the matrix. */
+
+	    dlaqgb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], &
+		    rowcnd, &colcnd, &amax, equed);
+	    rowequ = lsame_(equed, "R") || lsame_(equed,
+		     "B");
+	    colequ = lsame_(equed, "C") || lsame_(equed,
+		     "B");
+	}
+    }
+
+/*     Scale the right hand side. */
+
+    if (notran) {
+	if (rowequ) {
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1];
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (colequ) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1];
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+    if (nofact || equil) {
+
+/*        Compute the LU factorization of the band matrix A. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__2 = j - *ku;
+	    j1 = f2cmax(i__2,1);
+/* Computing MIN */
+	    i__2 = j + *kl;
+	    j2 = f2cmin(i__2,*n);
+	    i__2 = j2 - j1 + 1;
+	    dcopy_(&i__2, &ab[*ku + 1 - j + j1 + j * ab_dim1], &c__1, &afb[*
+		    kl + *ku + 1 - j + j1 + j * afb_dim1], &c__1);
+/* L70: */
+	}
+
+	dgbtrf_(n, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], info);
+
+/*        Return if INFO is non-zero. */
+
+	if (*info > 0) {
+
+/*           Compute the reciprocal pivot growth factor of the */
+/*           leading rank-deficient INFO columns of A. */
+
+	    anorm = 0.;
+	    i__1 = *info;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+		i__2 = *ku + 2 - j;
+/* Computing MIN */
+		i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1;
+		i__3 = f2cmin(i__4,i__5);
+		for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) {
+/* Computing MAX */
+		    d__2 = anorm, d__3 = (d__1 = ab[i__ + j * ab_dim1], abs(
+			    d__1));
+		    anorm = f2cmax(d__2,d__3);
+/* L80: */
+		}
+/* L90: */
+	    }
+/* Computing MIN */
+	    i__3 = *info - 1, i__2 = *kl + *ku;
+	    i__1 = f2cmin(i__3,i__2);
+/* Computing MAX */
+	    i__4 = 1, i__5 = *kl + *ku + 2 - *info;
+	    rpvgrw = dlantb_("M", "U", "N", info, &i__1, &afb[f2cmax(i__4,i__5) 
+		    + afb_dim1], ldafb, &work[1]);
+	    if (rpvgrw == 0.) {
+		rpvgrw = 1.;
+	    } else {
+		rpvgrw = anorm / rpvgrw;
+	    }
+	    work[1] = rpvgrw;
+	    *rcond = 0.;
+	    return 0;
+	}
+    }
+
+/*     Compute the norm of the matrix A and the */
+/*     reciprocal pivot growth factor RPVGRW. */
+
+    if (notran) {
+	*(unsigned char *)norm = '1';
+    } else {
+	*(unsigned char *)norm = 'I';
+    }
+    anorm = dlangb_(norm, n, kl, ku, &ab[ab_offset], ldab, &work[1]);
+    i__1 = *kl + *ku;
+    rpvgrw = dlantb_("M", "U", "N", n, &i__1, &afb[afb_offset], ldafb, &work[
+	    1]);
+    if (rpvgrw == 0.) {
+	rpvgrw = 1.;
+    } else {
+	rpvgrw = dlangb_("M", n, kl, ku, &ab[ab_offset], ldab, &work[1]) / rpvgrw;
+    }
+
+/*     Compute the reciprocal of the condition number of A. */
+
+    dgbcon_(norm, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], &anorm, rcond,
+	     &work[1], &iwork[1], info);
+
+/*     Compute the solution matrix X. */
+
+    dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
+    dgbtrs_(trans, n, kl, ku, nrhs, &afb[afb_offset], ldafb, &ipiv[1], &x[
+	    x_offset], ldx, info);
+
+/*     Use iterative refinement to improve the computed solution and */
+/*     compute error bounds and backward error estimates for it. */
+
+    dgbrfs_(trans, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[afb_offset], 
+	    ldafb, &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &
+	    berr[1], &work[1], &iwork[1], info);
+
+/*     Transform the solution matrix X to a solution of the original */
+/*     system. */
+
+    if (notran) {
+	if (colequ) {
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__3 = *n;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1];
+/* L100: */
+		}
+/* L110: */
+	    }
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		ferr[j] /= colcnd;
+/* L120: */
+	    }
+	}
+    } else if (rowequ) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    i__3 = *n;
+	    for (i__ = 1; i__ <= i__3; ++i__) {
+		x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1];
+/* L130: */
+	    }
+/* L140: */
+	}
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    ferr[j] /= rowcnd;
+/* L150: */
+	}
+    }
+
+/*     Set INFO = N+1 if the matrix is singular to working precision. */
+
+    if (*rcond < dlamch_("Epsilon")) {
+	*info = *n + 1;
+    }
+
+    work[1] = rpvgrw;
+    return 0;
+
+/*     End of DGBSVX */
+
+} /* dgbsvx_ */
+
diff --git a/lapack-netlib/SRC/dgbsvxx.c b/lapack-netlib/SRC/dgbsvxx.c
new file mode 100644
index 000000000..c737f112e
--- /dev/null
+++ b/lapack-netlib/SRC/dgbsvxx.c
@@ -0,0 +1,1249 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief <b> DGBSVXX computes the solution to system of linear equations A * X = B for GB matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBSVXX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbsvxx
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbsvxx
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbsvxx
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, */
+/*                           LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, */
+/*                           RCOND, RPVGRW, BERR, N_ERR_BNDS, */
+/*                           ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, */
+/*                           WORK, IWORK, INFO ) */
+
+/*       CHARACTER          EQUED, FACT, TRANS */
+/*       INTEGER            INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, */
+/*      $                   N_ERR_BNDS, KL, KU */
+/*       DOUBLE PRECISION   RCOND, RPVGRW */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */
+/*      $                   X( LDX , * ),WORK( * ) */
+/*       DOUBLE PRECISION   R( * ), C( * ), PARAMS( * ), BERR( * ), */
+/*      $                   ERR_BNDS_NORM( NRHS, * ), */
+/*      $                   ERR_BNDS_COMP( NRHS, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    DGBSVXX uses the LU factorization to compute the solution to a */
+/* >    double precision system of linear equations  A * X = B,  where A is an */
+/* >    N-by-N matrix and X and B are N-by-NRHS matrices. */
+/* > */
+/* >    If requested, both normwise and maximum componentwise error bounds */
+/* >    are returned. DGBSVXX will return a solution with a tiny */
+/* >    guaranteed error (O(eps) where eps is the working machine */
+/* >    precision) unless the matrix is very ill-conditioned, in which */
+/* >    case a warning is returned. Relevant condition numbers also are */
+/* >    calculated and returned. */
+/* > */
+/* >    DGBSVXX accepts user-provided factorizations and equilibration */
+/* >    factors; see the definitions of the FACT and EQUED options. */
+/* >    Solving with refinement and using a factorization from a previous */
+/* >    DGBSVXX call will also produce a solution with either O(eps) */
+/* >    errors or warnings, but we cannot make that claim for general */
+/* >    user-provided factorizations and equilibration factors if they */
+/* >    differ from what DGBSVXX would itself produce. */
+/* > \endverbatim */
+
+/* > \par Description: */
+/*  ================= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    The following steps are performed: */
+/* > */
+/* >    1. If FACT = 'E', double precision scaling factors are computed to equilibrate */
+/* >    the system: */
+/* > */
+/* >      TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B */
+/* >      TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */
+/* >      TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */
+/* > */
+/* >    Whether or not the system will be equilibrated depends on the */
+/* >    scaling of the matrix A, but if equilibration is used, A is */
+/* >    overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */
+/* >    or diag(C)*B (if TRANS = 'T' or 'C'). */
+/* > */
+/* >    2. If FACT = 'N' or 'E', the LU decomposition is used to factor */
+/* >    the matrix A (after equilibration if FACT = 'E') as */
+/* > */
+/* >      A = P * L * U, */
+/* > */
+/* >    where P is a permutation matrix, L is a unit lower triangular */
+/* >    matrix, and U is upper triangular. */
+/* > */
+/* >    3. If some U(i,i)=0, so that U is exactly singular, then the */
+/* >    routine returns with INFO = i. Otherwise, the factored form of A */
+/* >    is used to estimate the condition number of the matrix A (see */
+/* >    argument RCOND). If the reciprocal of the condition number is less */
+/* >    than machine precision, the routine still goes on to solve for X */
+/* >    and compute error bounds as described below. */
+/* > */
+/* >    4. The system of equations is solved for X using the factored form */
+/* >    of A. */
+/* > */
+/* >    5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */
+/* >    the routine will use iterative refinement to try to get a small */
+/* >    error and error bounds.  Refinement calculates the residual to at */
+/* >    least twice the working precision. */
+/* > */
+/* >    6. If equilibration was used, the matrix X is premultiplied by */
+/* >    diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */
+/* >    that it solves the original system before equilibration. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \verbatim */
+/* >     Some optional parameters are bundled in the PARAMS array.  These */
+/* >     settings determine how refinement is performed, but often the */
+/* >     defaults are acceptable.  If the defaults are acceptable, users */
+/* >     can pass NPARAMS = 0 which prevents the source code from accessing */
+/* >     the PARAMS argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] FACT */
+/* > \verbatim */
+/* >          FACT is CHARACTER*1 */
+/* >     Specifies whether or not the factored form of the matrix A is */
+/* >     supplied on entry, and if not, whether the matrix A should be */
+/* >     equilibrated before it is factored. */
+/* >       = 'F':  On entry, AF and IPIV contain the factored form of A. */
+/* >               If EQUED is not 'N', the matrix A has been */
+/* >               equilibrated with scaling factors given by R and C. */
+/* >               A, AF, and IPIV are not modified. */
+/* >       = 'N':  The matrix A will be copied to AF and factored. */
+/* >       = 'E':  The matrix A will be equilibrated if necessary, then */
+/* >               copied to AF and factored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >     Specifies the form of the system of equations: */
+/* >       = 'N':  A * X = B     (No transpose) */
+/* >       = 'T':  A**T * X = B  (Transpose) */
+/* >       = 'C':  A**H * X = B  (Conjugate Transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The number of linear equations, i.e., the order of the */
+/* >     matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >     The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >     The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >     The number of right hand sides, i.e., the number of columns */
+/* >     of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >     On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */
+/* >     The j-th column of A is stored in the j-th column of the */
+/* >     array AB as follows: */
+/* >     AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */
+/* > */
+/* >     If FACT = 'F' and EQUED is not 'N', then AB must have been */
+/* >     equilibrated by the scaling factors in R and/or C.  AB is not */
+/* >     modified if FACT = 'F' or 'N', or if FACT = 'E' and */
+/* >     EQUED = 'N' on exit. */
+/* > */
+/* >     On exit, if EQUED .ne. 'N', A is scaled as follows: */
+/* >     EQUED = 'R':  A := diag(R) * A */
+/* >     EQUED = 'C':  A := A * diag(C) */
+/* >     EQUED = 'B':  A := diag(R) * A * diag(C). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >     The leading dimension of the array AB.  LDAB >= KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AFB */
+/* > \verbatim */
+/* >          AFB is DOUBLE PRECISION array, dimension (LDAFB,N) */
+/* >     If FACT = 'F', then AFB is an input argument and on entry */
+/* >     contains details of the LU factorization of the band matrix */
+/* >     A, as computed by DGBTRF.  U is stored as an upper triangular */
+/* >     band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */
+/* >     and the multipliers used during the factorization are stored */
+/* >     in rows KL+KU+2 to 2*KL+KU+1.  If EQUED .ne. 'N', then AFB is */
+/* >     the factored form of the equilibrated matrix A. */
+/* > */
+/* >     If FACT = 'N', then AF is an output argument and on exit */
+/* >     returns the factors L and U from the factorization A = P*L*U */
+/* >     of the original matrix A. */
+/* > */
+/* >     If FACT = 'E', then AF is an output argument and on exit */
+/* >     returns the factors L and U from the factorization A = P*L*U */
+/* >     of the equilibrated matrix A (see the description of A for */
+/* >     the form of the equilibrated matrix). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAFB */
+/* > \verbatim */
+/* >          LDAFB is INTEGER */
+/* >     The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     If FACT = 'F', then IPIV is an input argument and on entry */
+/* >     contains the pivot indices from the factorization A = P*L*U */
+/* >     as computed by DGETRF; row i of the matrix was interchanged */
+/* >     with row IPIV(i). */
+/* > */
+/* >     If FACT = 'N', then IPIV is an output argument and on exit */
+/* >     contains the pivot indices from the factorization A = P*L*U */
+/* >     of the original matrix A. */
+/* > */
+/* >     If FACT = 'E', then IPIV is an output argument and on exit */
+/* >     contains the pivot indices from the factorization A = P*L*U */
+/* >     of the equilibrated matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] EQUED */
+/* > \verbatim */
+/* >          EQUED is CHARACTER*1 */
+/* >     Specifies the form of equilibration that was done. */
+/* >       = 'N':  No equilibration (always true if FACT = 'N'). */
+/* >       = 'R':  Row equilibration, i.e., A has been premultiplied by */
+/* >               diag(R). */
+/* >       = 'C':  Column equilibration, i.e., A has been postmultiplied */
+/* >               by diag(C). */
+/* >       = 'B':  Both row and column equilibration, i.e., A has been */
+/* >               replaced by diag(R) * A * diag(C). */
+/* >     EQUED is an input argument if FACT = 'F'; otherwise, it is an */
+/* >     output argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (N) */
+/* >     The row scale factors for A.  If EQUED = 'R' or 'B', A is */
+/* >     multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */
+/* >     is not accessed.  R is an input argument if FACT = 'F'; */
+/* >     otherwise, R is an output argument.  If FACT = 'F' and */
+/* >     EQUED = 'R' or 'B', each element of R must be positive. */
+/* >     If R is output, each element of R is a power of the radix. */
+/* >     If R is input, each element of R should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >     The column scale factors for A.  If EQUED = 'C' or 'B', A is */
+/* >     multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */
+/* >     is not accessed.  C is an input argument if FACT = 'F'; */
+/* >     otherwise, C is an output argument.  If FACT = 'F' and */
+/* >     EQUED = 'C' or 'B', each element of C must be positive. */
+/* >     If C is output, each element of C is a power of the radix. */
+/* >     If C is input, each element of C should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >     On entry, the N-by-NRHS right hand side matrix B. */
+/* >     On exit, */
+/* >     if EQUED = 'N', B is not modified; */
+/* >     if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */
+/* >        diag(R)*B; */
+/* >     if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */
+/* >        overwritten by diag(C)*B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >     The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] X */
+/* > \verbatim */
+/* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
+/* >     If INFO = 0, the N-by-NRHS solution matrix X to the original */
+/* >     system of equations.  Note that A and B are modified on exit */
+/* >     if EQUED .ne. 'N', and the solution to the equilibrated system is */
+/* >     inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */
+/* >     inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >     The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >     Reciprocal scaled condition number.  This is an estimate of the */
+/* >     reciprocal Skeel condition number of the matrix A after */
+/* >     equilibration (if done).  If this is less than the machine */
+/* >     precision (in particular, if it is zero), the matrix is singular */
+/* >     to working precision.  Note that the error may still be small even */
+/* >     if this number is very small and the matrix appears ill- */
+/* >     conditioned. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RPVGRW */
+/* > \verbatim */
+/* >          RPVGRW is DOUBLE PRECISION */
+/* >     Reciprocal pivot growth.  On exit, this contains the reciprocal */
+/* >     pivot growth factor norm(A)/norm(U). The "f2cmax absolute element" */
+/* >     norm is used.  If this is much less than 1, then the stability of */
+/* >     the LU factorization of the (equilibrated) matrix A could be poor. */
+/* >     This also means that the solution X, estimated condition numbers, */
+/* >     and error bounds could be unreliable. If factorization fails with */
+/* >     0<INFO<=N, then this contains the reciprocal pivot growth factor */
+/* >     for the leading INFO columns of A.  In DGESVX, this quantity is */
+/* >     returned in WORK(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >     Componentwise relative backward error.  This is the */
+/* >     componentwise relative backward error of each solution vector X(j) */
+/* >     (i.e., the smallest relative change in any element of A or B that */
+/* >     makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N_ERR_BNDS */
+/* > \verbatim */
+/* >          N_ERR_BNDS is INTEGER */
+/* >     Number of error bounds to return for each right hand side */
+/* >     and each type (normwise or componentwise).  See ERR_BNDS_NORM and */
+/* >     ERR_BNDS_COMP below. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_NORM */
+/* > \verbatim */
+/* >          ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     normwise relative error, which is defined as follows: */
+/* > */
+/* >     Normwise relative error in the ith solution vector: */
+/* >             max_j (abs(XTRUE(j,i) - X(j,i))) */
+/* >            ------------------------------ */
+/* >                  max_j abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the type of error information as described */
+/* >     below. There currently are up to three pieces of information */
+/* >     returned. */
+/* > */
+/* >     The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_NORM(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated normwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * dlamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*A, where S scales each row by a power of the */
+/* >              radix so all absolute row sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_COMP */
+/* > \verbatim */
+/* >          ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     componentwise relative error, which is defined as follows: */
+/* > */
+/* >     Componentwise relative error in the ith solution vector: */
+/* >                    abs(XTRUE(j,i) - X(j,i)) */
+/* >             max_j ---------------------- */
+/* >                         abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the right-hand side i (on which the */
+/* >     componentwise relative error depends), and the type of error */
+/* >     information as described below. There currently are up to three */
+/* >     pieces of information returned for each right-hand side. If */
+/* >     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
+/* >     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most */
+/* >     the first (:,N_ERR_BNDS) entries are returned. */
+/* > */
+/* >     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_COMP(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated componentwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * dlamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*(A*diag(x)), where x is the solution for the */
+/* >              current right-hand side and S scales each row of */
+/* >              A*diag(x) by a power of the radix so all absolute row */
+/* >              sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NPARAMS */
+/* > \verbatim */
+/* >          NPARAMS is INTEGER */
+/* >     Specifies the number of parameters set in PARAMS.  If <= 0, the */
+/* >     PARAMS array is never referenced and default values are used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] PARAMS */
+/* > \verbatim */
+/* >          PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) */
+/* >     Specifies algorithm parameters.  If an entry is < 0.0, then */
+/* >     that entry will be filled with default value used for that */
+/* >     parameter.  Only positions up to NPARAMS are accessed; defaults */
+/* >     are used for higher-numbered parameters. */
+/* > */
+/* >       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */
+/* >            refinement or not. */
+/* >         Default: 1.0D+0 */
+/* >            = 0.0:  No refinement is performed, and no error bounds are */
+/* >                    computed. */
+/* >            = 1.0:  Use the extra-precise refinement algorithm. */
+/* >              (other values are reserved for future use) */
+/* > */
+/* >       PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */
+/* >            computations allowed for refinement. */
+/* >         Default: 10 */
+/* >         Aggressive: Set to 100 to permit convergence using approximate */
+/* >                     factorizations or factorizations other than LU. If */
+/* >                     the factorization uses a technique other than */
+/* >                     Gaussian elimination, the guarantees in */
+/* >                     err_bnds_norm and err_bnds_comp may no longer be */
+/* >                     trustworthy. */
+/* > */
+/* >       PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */
+/* >            will attempt to find a solution with small componentwise */
+/* >            relative error in the double-precision algorithm.  Positive */
+/* >            is true, 0.0 is false. */
+/* >         Default: 1.0 (attempt componentwise convergence) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (4*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. The solution to every right-hand side is */
+/* >         guaranteed. */
+/* >       < 0:  If INFO = -i, the i-th argument had an illegal value */
+/* >       > 0 and <= N:  U(INFO,INFO) is exactly zero.  The factorization */
+/* >         has been completed, but the factor U is exactly singular, so */
+/* >         the solution and error bounds could not be computed. RCOND = 0 */
+/* >         is returned. */
+/* >       = N+J: The solution corresponding to the Jth right-hand side is */
+/* >         not guaranteed. The solutions corresponding to other right- */
+/* >         hand sides K with K > J may not be guaranteed as well, but */
+/* >         only the first such right-hand side is reported. If a small */
+/* >         componentwise error is not requested (PARAMS(3) = 0.0) then */
+/* >         the Jth right-hand side is the first with a normwise error */
+/* >         bound that is not guaranteed (the smallest J such */
+/* >         that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */
+/* >         the Jth right-hand side is the first with either a normwise or */
+/* >         componentwise error bound that is not guaranteed (the smallest */
+/* >         J such that either ERR_BNDS_NORM(J,1) = 0.0 or */
+/* >         ERR_BNDS_COMP(J,1) = 0.0). See the definition of */
+/* >         ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */
+/* >         about all of the right-hand sides check ERR_BNDS_NORM or */
+/* >         ERR_BNDS_COMP. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup doubleGBsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgbsvxx_(char *fact, char *trans, integer *n, integer *
+	kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, 
+	doublereal *afb, integer *ldafb, integer *ipiv, char *equed, 
+	doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, 
+	doublereal *x, integer *ldx, doublereal *rcond, doublereal *rpvgrw, 
+	doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, 
+	doublereal *err_bnds_comp__, integer *nparams, doublereal *params, 
+	doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, 
+	    x_dim1, x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, 
+	    err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    doublereal amax;
+    extern doublereal dla_gbrpvgrw_(integer *, integer *, integer *, integer 
+	    *, doublereal *, integer *, doublereal *, integer *);
+    integer i__, j;
+    extern logical lsame_(char *, char *);
+    doublereal rcmin, rcmax;
+    logical equil;
+    extern doublereal dlamch_(char *);
+    extern /* Subroutine */ int dlaqgb_(integer *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *, char *);
+    doublereal colcnd;
+    extern /* Subroutine */ int dgbtrf_(integer *, integer *, integer *, 
+	    integer *, doublereal *, integer *, integer *, integer *);
+    logical nofact;
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen);
+    doublereal bignum;
+    extern /* Subroutine */ int dgbtrs_(char *, integer *, integer *, integer 
+	    *, integer *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *);
+    integer infequ;
+    logical colequ;
+    doublereal rowcnd;
+    logical notran;
+    doublereal smlnum;
+    logical rowequ;
+    extern /* Subroutine */ int dlascl2_(integer *, integer *, doublereal *, 
+	    doublereal *, integer *), dgbequb_(integer *, integer *, integer *
+	    , integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *, integer *), dgbrfsx_(
+	    char *, char *, integer *, integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ================================================================== */
+
+
+    /* Parameter adjustments */
+    err_bnds_comp_dim1 = *nrhs;
+    err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1;
+    err_bnds_comp__ -= err_bnds_comp_offset;
+    err_bnds_norm_dim1 = *nrhs;
+    err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1;
+    err_bnds_norm__ -= err_bnds_norm_offset;
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    afb_dim1 = *ldafb;
+    afb_offset = 1 + afb_dim1 * 1;
+    afb -= afb_offset;
+    --ipiv;
+    --r__;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --berr;
+    --params;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    nofact = lsame_(fact, "N");
+    equil = lsame_(fact, "E");
+    notran = lsame_(trans, "N");
+    smlnum = dlamch_("Safe minimum");
+    bignum = 1. / smlnum;
+    if (nofact || equil) {
+	*(unsigned char *)equed = 'N';
+	rowequ = FALSE_;
+	colequ = FALSE_;
+    } else {
+	rowequ = lsame_(equed, "R") || lsame_(equed, 
+		"B");
+	colequ = lsame_(equed, "C") || lsame_(equed, 
+		"B");
+    }
+
+/*     Default is failure.  If an input parameter is wrong or */
+/*     factorization fails, make everything look horrible.  Only the */
+/*     pivot growth is set here, the rest is initialized in DGBRFSX. */
+
+    *rpvgrw = 0.;
+
+/*     Test the input parameters.  PARAMS is not tested until DGBRFSX. */
+
+    if (! nofact && ! equil && ! lsame_(fact, "F")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*kl < 0) {
+	*info = -4;
+    } else if (*ku < 0) {
+	*info = -5;
+    } else if (*nrhs < 0) {
+	*info = -6;
+    } else if (*ldab < *kl + *ku + 1) {
+	*info = -8;
+    } else if (*ldafb < (*kl << 1) + *ku + 1) {
+	*info = -10;
+    } else if (lsame_(fact, "F") && ! (rowequ || colequ 
+	    || lsame_(equed, "N"))) {
+	*info = -12;
+    } else {
+	if (rowequ) {
+	    rcmin = bignum;
+	    rcmax = 0.;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		d__1 = rcmin, d__2 = r__[j];
+		rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+		d__1 = rcmax, d__2 = r__[j];
+		rcmax = f2cmax(d__1,d__2);
+/* L10: */
+	    }
+	    if (rcmin <= 0.) {
+		*info = -13;
+	    } else if (*n > 0) {
+		rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		rowcnd = 1.;
+	    }
+	}
+	if (colequ && *info == 0) {
+	    rcmin = bignum;
+	    rcmax = 0.;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		d__1 = rcmin, d__2 = c__[j];
+		rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+		d__1 = rcmax, d__2 = c__[j];
+		rcmax = f2cmax(d__1,d__2);
+/* L20: */
+	    }
+	    if (rcmin <= 0.) {
+		*info = -14;
+	    } else if (*n > 0) {
+		colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		colcnd = 1.;
+	    }
+	}
+	if (*info == 0) {
+	    if (*ldb < f2cmax(1,*n)) {
+		*info = -15;
+	    } else if (*ldx < f2cmax(1,*n)) {
+		*info = -16;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBSVXX", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    if (equil) {
+
+/*     Compute row and column scalings to equilibrate the matrix A. */
+
+	dgbequb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], &
+		rowcnd, &colcnd, &amax, &infequ);
+	if (infequ == 0) {
+
+/*     Equilibrate the matrix. */
+
+	    dlaqgb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], &
+		    rowcnd, &colcnd, &amax, equed);
+	    rowequ = lsame_(equed, "R") || lsame_(equed,
+		     "B");
+	    colequ = lsame_(equed, "C") || lsame_(equed,
+		     "B");
+	}
+
+/*     If the scaling factors are not applied, set them to 1.0. */
+
+	if (! rowequ) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		r__[j] = 1.;
+	    }
+	}
+	if (! colequ) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		c__[j] = 1.;
+	    }
+	}
+    }
+
+/*     Scale the right hand side. */
+
+    if (notran) {
+	if (rowequ) {
+	    dlascl2_(n, nrhs, &r__[1], &b[b_offset], ldb);
+	}
+    } else {
+	if (colequ) {
+	    dlascl2_(n, nrhs, &c__[1], &b[b_offset], ldb);
+	}
+    }
+
+    if (nofact || equil) {
+
+/*        Compute the LU factorization of A. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = (*kl << 1) + *ku + 1;
+	    for (i__ = *kl + 1; i__ <= i__2; ++i__) {
+		afb[i__ + j * afb_dim1] = ab[i__ - *kl + j * ab_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+	dgbtrf_(n, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], info);
+
+/*        Return if INFO is non-zero. */
+
+	if (*info > 0) {
+
+/*           Pivot in column INFO is exactly 0 */
+/*           Compute the reciprocal pivot growth factor of the */
+/*           leading rank-deficient INFO columns of A. */
+
+	    *rpvgrw = dla_gbrpvgrw_(n, kl, ku, info, &ab[ab_offset], ldab, &
+		    afb[afb_offset], ldafb);
+	    return 0;
+	}
+    }
+
+/*     Compute the reciprocal pivot growth factor RPVGRW. */
+
+    *rpvgrw = dla_gbrpvgrw_(n, kl, ku, n, &ab[ab_offset], ldab, &afb[
+	    afb_offset], ldafb);
+
+/*     Compute the solution matrix X. */
+
+    dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
+    dgbtrs_(trans, n, kl, ku, nrhs, &afb[afb_offset], ldafb, &ipiv[1], &x[
+	    x_offset], ldx, info);
+
+/*     Use iterative refinement to improve the computed solution and */
+/*     compute error bounds and backward error estimates for it. */
+
+    dgbrfsx_(trans, equed, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[
+	    afb_offset], ldafb, &ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb,
+	     &x[x_offset], ldx, rcond, &berr[1], n_err_bnds__, &
+	    err_bnds_norm__[err_bnds_norm_offset], &err_bnds_comp__[
+	    err_bnds_comp_offset], nparams, &params[1], &work[1], &iwork[1], 
+	    info);
+
+/*     Scale solutions. */
+
+    if (colequ && notran) {
+	dlascl2_(n, nrhs, &c__[1], &x[x_offset], ldx);
+    } else if (rowequ && ! notran) {
+	dlascl2_(n, nrhs, &r__[1], &x[x_offset], ldx);
+    }
+
+    return 0;
+
+/*     End of DGBSVXX */
+
+} /* dgbsvxx_ */
+
diff --git a/lapack-netlib/SRC/dgbtf2.c b/lapack-netlib/SRC/dgbtf2.c
new file mode 100644
index 000000000..37c2fea15
--- /dev/null
+++ b/lapack-netlib/SRC/dgbtf2.c
@@ -0,0 +1,698 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b9 = -1.;
+
+/* > \brief \b DGBTF2 computes the LU factorization of a general band matrix using the unblocked version of th
+e algorithm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBTF2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtf2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtf2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtf2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) */
+
+/*       INTEGER            INFO, KL, KU, LDAB, M, N */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   AB( LDAB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBTF2 computes an LU factorization of a real m-by-n band matrix A */
+/* > using partial pivoting with row interchanges. */
+/* > */
+/* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          On entry, the matrix A in band storage, in rows KL+1 to */
+/* >          2*KL+KU+1; rows 1 to KL of the array need not be set. */
+/* >          The j-th column of A is stored in the j-th column of the */
+/* >          array AB as follows: */
+/* >          AB(kl+ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl) */
+/* > */
+/* >          On exit, details of the factorization: U is stored as an */
+/* >          upper triangular band matrix with KL+KU superdiagonals in */
+/* >          rows 1 to KL+KU+1, and the multipliers used during the */
+/* >          factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */
+/* >          See below for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
+/* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */
+/* >               has been completed, but the factor U is exactly */
+/* >               singular, and division by zero will occur if it is used */
+/* >               to solve a system of equations. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGBcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The band storage scheme is illustrated by the following example, when */
+/* >  M = N = 6, KL = 2, KU = 1: */
+/* > */
+/* >  On entry:                       On exit: */
+/* > */
+/* >      *    *    *    +    +    +       *    *    *   u14  u25  u36 */
+/* >      *    *    +    +    +    +       *    *   u13  u24  u35  u46 */
+/* >      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
+/* >     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */
+/* >     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   * */
+/* >     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    * */
+/* > */
+/* >  Array elements marked * are not used by the routine; elements marked */
+/* >  + need not be set on entry, but are required by the routine to store */
+/* >  elements of U, because of fill-in resulting from the row */
+/* >  interchanges. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgbtf2_(integer *m, integer *n, integer *kl, integer *ku,
+	 doublereal *ab, integer *ldab, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer i__, j;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *), dswap_(integer *, doublereal *, integer *, doublereal 
+	    *, integer *);
+    integer km, jp, ju, kv;
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     KV is the number of superdiagonals in the factor U, allowing for */
+/*     fill-in. */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --ipiv;
+
+    /* Function Body */
+    kv = *ku + *kl;
+
+/*     Test the input parameters. */
+
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0) {
+	*info = -3;
+    } else if (*ku < 0) {
+	*info = -4;
+    } else if (*ldab < *kl + kv + 1) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBTF2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Gaussian elimination with partial pivoting */
+
+/*     Set fill-in elements in columns KU+2 to KV to zero. */
+
+    i__1 = f2cmin(kv,*n);
+    for (j = *ku + 2; j <= i__1; ++j) {
+	i__2 = *kl;
+	for (i__ = kv - j + 2; i__ <= i__2; ++i__) {
+	    ab[i__ + j * ab_dim1] = 0.;
+/* L10: */
+	}
+/* L20: */
+    }
+
+/*     JU is the index of the last column affected by the current stage */
+/*     of the factorization. */
+
+    ju = 1;
+
+    i__1 = f2cmin(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
+
+/*        Set fill-in elements in column J+KV to zero. */
+
+	if (j + kv <= *n) {
+	    i__2 = *kl;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		ab[i__ + (j + kv) * ab_dim1] = 0.;
+/* L30: */
+	    }
+	}
+
+/*        Find pivot and test for singularity. KM is the number of */
+/*        subdiagonal elements in the current column. */
+
+/* Computing MIN */
+	i__2 = *kl, i__3 = *m - j;
+	km = f2cmin(i__2,i__3);
+	i__2 = km + 1;
+	jp = idamax_(&i__2, &ab[kv + 1 + j * ab_dim1], &c__1);
+	ipiv[j] = jp + j - 1;
+	if (ab[kv + jp + j * ab_dim1] != 0.) {
+/* Computing MAX */
+/* Computing MIN */
+	    i__4 = j + *ku + jp - 1;
+	    i__2 = ju, i__3 = f2cmin(i__4,*n);
+	    ju = f2cmax(i__2,i__3);
+
+/*           Apply interchange to columns J to JU. */
+
+	    if (jp != 1) {
+		i__2 = ju - j + 1;
+		i__3 = *ldab - 1;
+		i__4 = *ldab - 1;
+		dswap_(&i__2, &ab[kv + jp + j * ab_dim1], &i__3, &ab[kv + 1 + 
+			j * ab_dim1], &i__4);
+	    }
+
+	    if (km > 0) {
+
+/*              Compute multipliers. */
+
+		d__1 = 1. / ab[kv + 1 + j * ab_dim1];
+		dscal_(&km, &d__1, &ab[kv + 2 + j * ab_dim1], &c__1);
+
+/*              Update trailing submatrix within the band. */
+
+		if (ju > j) {
+		    i__2 = ju - j;
+		    i__3 = *ldab - 1;
+		    i__4 = *ldab - 1;
+		    dger_(&km, &i__2, &c_b9, &ab[kv + 2 + j * ab_dim1], &c__1,
+			     &ab[kv + (j + 1) * ab_dim1], &i__3, &ab[kv + 1 + 
+			    (j + 1) * ab_dim1], &i__4);
+		}
+	    }
+	} else {
+
+/*           If pivot is zero, set INFO to the index of the pivot */
+/*           unless a zero pivot has already been found. */
+
+	    if (*info == 0) {
+		*info = j;
+	    }
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of DGBTF2 */
+
+} /* dgbtf2_ */
+
diff --git a/lapack-netlib/SRC/dgbtrf.c b/lapack-netlib/SRC/dgbtrf.c
new file mode 100644
index 000000000..a9bf6eed6
--- /dev/null
+++ b/lapack-netlib/SRC/dgbtrf.c
@@ -0,0 +1,1021 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__65 = 65;
+static doublereal c_b18 = -1.;
+static doublereal c_b31 = 1.;
+
+/* > \brief \b DGBTRF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBTRF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtrf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtrf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtrf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) */
+
+/*       INTEGER            INFO, KL, KU, LDAB, M, N */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   AB( LDAB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBTRF computes an LU factorization of a real m-by-n band matrix A */
+/* > using partial pivoting with row interchanges. */
+/* > */
+/* > This is the blocked version of the algorithm, calling Level 3 BLAS. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          On entry, the matrix A in band storage, in rows KL+1 to */
+/* >          2*KL+KU+1; rows 1 to KL of the array need not be set. */
+/* >          The j-th column of A is stored in the j-th column of the */
+/* >          array AB as follows: */
+/* >          AB(kl+ku+1+i-j,j) = A(i,j) for f2cmax(1,j-ku)<=i<=f2cmin(m,j+kl) */
+/* > */
+/* >          On exit, details of the factorization: U is stored as an */
+/* >          upper triangular band matrix with KL+KU superdiagonals in */
+/* >          rows 1 to KL+KU+1, and the multipliers used during the */
+/* >          factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */
+/* >          See below for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
+/* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */
+/* >               has been completed, but the factor U is exactly */
+/* >               singular, and division by zero will occur if it is used */
+/* >               to solve a system of equations. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGBcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The band storage scheme is illustrated by the following example, when */
+/* >  M = N = 6, KL = 2, KU = 1: */
+/* > */
+/* >  On entry:                       On exit: */
+/* > */
+/* >      *    *    *    +    +    +       *    *    *   u14  u25  u36 */
+/* >      *    *    +    +    +    +       *    *   u13  u24  u35  u46 */
+/* >      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
+/* >     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */
+/* >     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   * */
+/* >     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    * */
+/* > */
+/* >  Array elements marked * are not used by the routine; elements marked */
+/* >  + need not be set on entry, but are required by the routine to store */
+/* >  elements of U because of fill-in resulting from the row interchanges. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgbtrf_(integer *m, integer *n, integer *kl, integer *ku,
+	 doublereal *ab, integer *ldab, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    doublereal d__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    doublereal temp;
+    integer i__, j;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *), dgemm_(char *, char *, integer *, integer *, integer *
+	    , doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *), dcopy_(
+	    integer *, doublereal *, integer *, doublereal *, integer *), 
+	    dswap_(integer *, doublereal *, integer *, doublereal *, integer *
+	    );
+    doublereal work13[4160]	/* was [65][64] */, work31[4160]	/* 
+	    was [65][64] */;
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer i2, i3, j2, j3, k2;
+    extern /* Subroutine */ int dgbtf2_(integer *, integer *, integer *, 
+	    integer *, doublereal *, integer *, integer *, integer *);
+    integer jb, nb, ii, jj, jm, ip, jp, km, ju, kv;
+    extern integer idamax_(integer *, doublereal *, integer *);
+    integer nw;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, 
+	    integer *, integer *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     KV is the number of superdiagonals in the factor U, allowing for */
+/*     fill-in */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --ipiv;
+
+    /* Function Body */
+    kv = *ku + *kl;
+
+/*     Test the input parameters. */
+
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0) {
+	*info = -3;
+    } else if (*ku < 0) {
+	*info = -4;
+    } else if (*ldab < *kl + kv + 1) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBTRF", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment */
+
+    nb = ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku, (ftnlen)6, (ftnlen)1);
+
+/*     The block size must not exceed the limit set by the size of the */
+/*     local arrays WORK13 and WORK31. */
+
+    nb = f2cmin(nb,64);
+
+    if (nb <= 1 || nb > *kl) {
+
+/*        Use unblocked code */
+
+	dgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code */
+
+/*        Zero the superdiagonal elements of the work array WORK13 */
+
+	i__1 = nb;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j - 1;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work13[i__ + j * 65 - 66] = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+/*        Zero the subdiagonal elements of the work array WORK31 */
+
+	i__1 = nb;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = nb;
+	    for (i__ = j + 1; i__ <= i__2; ++i__) {
+		work31[i__ + j * 65 - 66] = 0.;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+/*        Gaussian elimination with partial pivoting */
+
+/*        Set fill-in elements in columns KU+2 to KV to zero */
+
+	i__1 = f2cmin(kv,*n);
+	for (j = *ku + 2; j <= i__1; ++j) {
+	    i__2 = *kl;
+	    for (i__ = kv - j + 2; i__ <= i__2; ++i__) {
+		ab[i__ + j * ab_dim1] = 0.;
+/* L50: */
+	    }
+/* L60: */
+	}
+
+/*        JU is the index of the last column affected by the current */
+/*        stage of the factorization */
+
+	ju = 1;
+
+	i__1 = f2cmin(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = f2cmin(*m,*n) - j + 1;
+	    jb = f2cmin(i__3,i__4);
+
+/*           The active part of the matrix is partitioned */
+
+/*              A11   A12   A13 */
+/*              A21   A22   A23 */
+/*              A31   A32   A33 */
+
+/*           Here A11, A21 and A31 denote the current block of JB columns */
+/*           which is about to be factorized. The number of rows in the */
+/*           partitioning are JB, I2, I3 respectively, and the numbers */
+/*           of columns are JB, J2, J3. The superdiagonal elements of A13 */
+/*           and the subdiagonal elements of A31 lie outside the band. */
+
+/* Computing MIN */
+	    i__3 = *kl - jb, i__4 = *m - j - jb + 1;
+	    i2 = f2cmin(i__3,i__4);
+/* Computing MIN */
+	    i__3 = jb, i__4 = *m - j - *kl + 1;
+	    i3 = f2cmin(i__3,i__4);
+
+/*           J2 and J3 are computed after JU has been updated. */
+
+/*           Factorize the current block of JB columns */
+
+	    i__3 = j + jb - 1;
+	    for (jj = j; jj <= i__3; ++jj) {
+
+/*              Set fill-in elements in column JJ+KV to zero */
+
+		if (jj + kv <= *n) {
+		    i__4 = *kl;
+		    for (i__ = 1; i__ <= i__4; ++i__) {
+			ab[i__ + (jj + kv) * ab_dim1] = 0.;
+/* L70: */
+		    }
+		}
+
+/*              Find pivot and test for singularity. KM is the number of */
+/*              subdiagonal elements in the current column. */
+
+/* Computing MIN */
+		i__4 = *kl, i__5 = *m - jj;
+		km = f2cmin(i__4,i__5);
+		i__4 = km + 1;
+		jp = idamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1);
+		ipiv[jj] = jp + jj - j;
+		if (ab[kv + jp + jj * ab_dim1] != 0.) {
+/* Computing MAX */
+/* Computing MIN */
+		    i__6 = jj + *ku + jp - 1;
+		    i__4 = ju, i__5 = f2cmin(i__6,*n);
+		    ju = f2cmax(i__4,i__5);
+		    if (jp != 1) {
+
+/*                    Apply interchange to columns J to J+JB-1 */
+
+			if (jp + jj - 1 < j + *kl) {
+
+			    i__4 = *ldab - 1;
+			    i__5 = *ldab - 1;
+			    dswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], &
+				    i__4, &ab[kv + jp + jj - j + j * ab_dim1],
+				     &i__5);
+			} else {
+
+/*                       The interchange affects columns J to JJ-1 of A31 */
+/*                       which are stored in the work array WORK31 */
+
+			    i__4 = jj - j;
+			    i__5 = *ldab - 1;
+			    dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], 
+				    &i__5, &work31[jp + jj - j - *kl - 1], &
+				    c__65);
+			    i__4 = j + jb - jj;
+			    i__5 = *ldab - 1;
+			    i__6 = *ldab - 1;
+			    dswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, &
+				    ab[kv + jp + jj * ab_dim1], &i__6);
+			}
+		    }
+
+/*                 Compute multipliers */
+
+		    d__1 = 1. / ab[kv + 1 + jj * ab_dim1];
+		    dscal_(&km, &d__1, &ab[kv + 2 + jj * ab_dim1], &c__1);
+
+/*                 Update trailing submatrix within the band and within */
+/*                 the current block. JM is the index of the last column */
+/*                 which needs to be updated. */
+
+/* Computing MIN */
+		    i__4 = ju, i__5 = j + jb - 1;
+		    jm = f2cmin(i__4,i__5);
+		    if (jm > jj) {
+			i__4 = jm - jj;
+			i__5 = *ldab - 1;
+			i__6 = *ldab - 1;
+			dger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1], 
+				&c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, &
+				ab[kv + 1 + (jj + 1) * ab_dim1], &i__6);
+		    }
+		} else {
+
+/*                 If pivot is zero, set INFO to the index of the pivot */
+/*                 unless a zero pivot has already been found. */
+
+		    if (*info == 0) {
+			*info = jj;
+		    }
+		}
+
+/*              Copy current column of A31 into the work array WORK31 */
+
+/* Computing MIN */
+		i__4 = jj - j + 1;
+		nw = f2cmin(i__4,i3);
+		if (nw > 0) {
+		    dcopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], &
+			    c__1, &work31[(jj - j + 1) * 65 - 65], &c__1);
+		}
+/* L80: */
+	    }
+	    if (j + jb <= *n) {
+
+/*              Apply the row interchanges to the other blocks. */
+
+/* Computing MIN */
+		i__3 = ju - j + 1;
+		j2 = f2cmin(i__3,kv) - jb;
+/* Computing MAX */
+		i__3 = 0, i__4 = ju - j - kv + 1;
+		j3 = f2cmax(i__3,i__4);
+
+/*              Use DLASWP to apply the row interchanges to A12, A22, and */
+/*              A32. */
+
+		i__3 = *ldab - 1;
+		dlaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &
+			c__1, &jb, &ipiv[j], &c__1);
+
+/*              Adjust the pivot indices. */
+
+		i__3 = j + jb - 1;
+		for (i__ = j; i__ <= i__3; ++i__) {
+		    ipiv[i__] = ipiv[i__] + j - 1;
+/* L90: */
+		}
+
+/*              Apply the row interchanges to A13, A23, and A33 */
+/*              columnwise. */
+
+		k2 = j - 1 + jb + j2;
+		i__3 = j3;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    jj = k2 + i__;
+		    i__4 = j + jb - 1;
+		    for (ii = j + i__ - 1; ii <= i__4; ++ii) {
+			ip = ipiv[ii];
+			if (ip != ii) {
+			    temp = ab[kv + 1 + ii - jj + jj * ab_dim1];
+			    ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 + 
+				    ip - jj + jj * ab_dim1];
+			    ab[kv + 1 + ip - jj + jj * ab_dim1] = temp;
+			}
+/* L100: */
+		    }
+/* L110: */
+		}
+
+/*              Update the relevant part of the trailing submatrix */
+
+		if (j2 > 0) {
+
+/*                 Update A12 */
+
+		    i__3 = *ldab - 1;
+		    i__4 = *ldab - 1;
+		    dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, 
+			    &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv 
+			    + 1 - jb + (j + jb) * ab_dim1], &i__4);
+
+		    if (i2 > 0) {
+
+/*                    Update A22 */
+
+			i__3 = *ldab - 1;
+			i__4 = *ldab - 1;
+			i__5 = *ldab - 1;
+			dgemm_("No transpose", "No transpose", &i2, &j2, &jb, 
+				&c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
+				 &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4,
+				 &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], &
+				i__5);
+		    }
+
+		    if (i3 > 0) {
+
+/*                    Update A32 */
+
+			i__3 = *ldab - 1;
+			i__4 = *ldab - 1;
+			dgemm_("No transpose", "No transpose", &i3, &j2, &jb, 
+				&c_b18, work31, &c__65, &ab[kv + 1 - jb + (j 
+				+ jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl 
+				+ 1 - jb + (j + jb) * ab_dim1], &i__4);
+		    }
+		}
+
+		if (j3 > 0) {
+
+/*                 Copy the lower triangle of A13 into the work array */
+/*                 WORK13 */
+
+		    i__3 = j3;
+		    for (jj = 1; jj <= i__3; ++jj) {
+			i__4 = jb;
+			for (ii = jj; ii <= i__4; ++ii) {
+			    work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj 
+				    + j + kv - 1) * ab_dim1];
+/* L120: */
+			}
+/* L130: */
+		    }
+
+/*                 Update A13 in the work array */
+
+		    i__3 = *ldab - 1;
+		    dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, 
+			    &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13, 
+			    &c__65);
+
+		    if (i2 > 0) {
+
+/*                    Update A23 */
+
+			i__3 = *ldab - 1;
+			i__4 = *ldab - 1;
+			dgemm_("No transpose", "No transpose", &i2, &j3, &jb, 
+				&c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
+				 work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv)
+				 * ab_dim1], &i__4);
+		    }
+
+		    if (i3 > 0) {
+
+/*                    Update A33 */
+
+			i__3 = *ldab - 1;
+			dgemm_("No transpose", "No transpose", &i3, &j3, &jb, 
+				&c_b18, work31, &c__65, work13, &c__65, &
+				c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], &
+				i__3);
+		    }
+
+/*                 Copy the lower triangle of A13 back into place */
+
+		    i__3 = j3;
+		    for (jj = 1; jj <= i__3; ++jj) {
+			i__4 = jb;
+			for (ii = jj; ii <= i__4; ++ii) {
+			    ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] = 
+				    work13[ii + jj * 65 - 66];
+/* L140: */
+			}
+/* L150: */
+		    }
+		}
+	    } else {
+
+/*              Adjust the pivot indices. */
+
+		i__3 = j + jb - 1;
+		for (i__ = j; i__ <= i__3; ++i__) {
+		    ipiv[i__] = ipiv[i__] + j - 1;
+/* L160: */
+		}
+	    }
+
+/*           Partially undo the interchanges in the current block to */
+/*           restore the upper triangular form of A31 and copy the upper */
+/*           triangle of A31 back into place */
+
+	    i__3 = j;
+	    for (jj = j + jb - 1; jj >= i__3; --jj) {
+		jp = ipiv[jj] - jj + 1;
+		if (jp != 1) {
+
+/*                 Apply interchange to columns J to JJ-1 */
+
+		    if (jp + jj - 1 < j + *kl) {
+
+/*                    The interchange does not affect A31 */
+
+			i__4 = jj - j;
+			i__5 = *ldab - 1;
+			i__6 = *ldab - 1;
+			dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
+				i__5, &ab[kv + jp + jj - j + j * ab_dim1], &
+				i__6);
+		    } else {
+
+/*                    The interchange does affect A31 */
+
+			i__4 = jj - j;
+			i__5 = *ldab - 1;
+			dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
+				i__5, &work31[jp + jj - j - *kl - 1], &c__65);
+		    }
+		}
+
+/*              Copy the current column of A31 back into place */
+
+/* Computing MIN */
+		i__4 = i3, i__5 = jj - j + 1;
+		nw = f2cmin(i__4,i__5);
+		if (nw > 0) {
+		    dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[
+			    kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1);
+		}
+/* L170: */
+	    }
+/* L180: */
+	}
+    }
+
+    return 0;
+
+/*     End of DGBTRF */
+
+} /* dgbtrf_ */
+
diff --git a/lapack-netlib/SRC/dgbtrs.c b/lapack-netlib/SRC/dgbtrs.c
new file mode 100644
index 000000000..c2045bef3
--- /dev/null
+++ b/lapack-netlib/SRC/dgbtrs.c
@@ -0,0 +1,686 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static doublereal c_b7 = -1.;
+static integer c__1 = 1;
+static doublereal c_b23 = 1.;
+
+/* > \brief \b DGBTRS */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGBTRS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtrs.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtrs.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtrs.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, */
+/*                          INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGBTRS solves a system of linear equations */
+/* >    A * X = B  or  A**T * X = B */
+/* > with a general band matrix A using the LU factorization computed */
+/* > by DGBTRF. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations. */
+/* >          = 'N':  A * X = B  (No transpose) */
+/* >          = 'T':  A**T* X = B  (Transpose) */
+/* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KL */
+/* > \verbatim */
+/* >          KL is INTEGER */
+/* >          The number of subdiagonals within the band of A.  KL >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] KU */
+/* > \verbatim */
+/* >          KU is INTEGER */
+/* >          The number of superdiagonals within the band of A.  KU >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AB */
+/* > \verbatim */
+/* >          AB is DOUBLE PRECISION array, dimension (LDAB,N) */
+/* >          Details of the LU factorization of the band matrix A, as */
+/* >          computed by DGBTRF.  U is stored as an upper triangular band */
+/* >          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
+/* >          the multipliers used during the factorization are stored in */
+/* >          rows KL+KU+2 to 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAB */
+/* > \verbatim */
+/* >          LDAB is INTEGER */
+/* >          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices; for 1 <= i <= N, row i of the matrix was */
+/* >          interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the right hand side matrix B. */
+/* >          On exit, the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGBcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgbtrs_(char *trans, integer *n, integer *kl, integer *
+	ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, 
+	doublereal *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer i__, j, l;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *), dswap_(integer *, 
+	    doublereal *, integer *, doublereal *, integer *), dtbsv_(char *, 
+	    char *, char *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    logical lnoti;
+    integer kd, lm;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical notran;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    ab_dim1 = *ldab;
+    ab_offset = 1 + ab_dim1 * 1;
+    ab -= ab_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*kl < 0) {
+	*info = -3;
+    } else if (*ku < 0) {
+	*info = -4;
+    } else if (*nrhs < 0) {
+	*info = -5;
+    } else if (*ldab < (*kl << 1) + *ku + 1) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGBTRS", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    kd = *ku + *kl + 1;
+    lnoti = *kl > 0;
+
+    if (notran) {
+
+/*        Solve  A*X = B. */
+
+/*        Solve L*X = B, overwriting B with X. */
+
+/*        L is represented as a product of permutations and unit lower */
+/*        triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), */
+/*        where each transformation L(i) is a rank-one modification of */
+/*        the identity matrix. */
+
+	if (lnoti) {
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		i__2 = *kl, i__3 = *n - j;
+		lm = f2cmin(i__2,i__3);
+		l = ipiv[j];
+		if (l != j) {
+		    dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb);
+		}
+		dger_(&lm, nrhs, &c_b7, &ab[kd + 1 + j * ab_dim1], &c__1, &b[
+			j + b_dim1], ldb, &b[j + 1 + b_dim1], ldb);
+/* L10: */
+	    }
+	}
+
+	i__1 = *nrhs;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Solve U*X = B, overwriting B with X. */
+
+	    i__2 = *kl + *ku;
+	    dtbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &ab[
+		    ab_offset], ldab, &b[i__ * b_dim1 + 1], &c__1);
+/* L20: */
+	}
+
+    } else {
+
+/*        Solve A**T*X = B. */
+
+	i__1 = *nrhs;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Solve U**T*X = B, overwriting B with X. */
+
+	    i__2 = *kl + *ku;
+	    dtbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &ab[ab_offset],
+		     ldab, &b[i__ * b_dim1 + 1], &c__1);
+/* L30: */
+	}
+
+/*        Solve L**T*X = B, overwriting B with X. */
+
+	if (lnoti) {
+	    for (j = *n - 1; j >= 1; --j) {
+/* Computing MIN */
+		i__1 = *kl, i__2 = *n - j;
+		lm = f2cmin(i__1,i__2);
+		dgemv_("Transpose", &lm, nrhs, &c_b7, &b[j + 1 + b_dim1], ldb,
+			 &ab[kd + 1 + j * ab_dim1], &c__1, &c_b23, &b[j + 
+			b_dim1], ldb);
+		l = ipiv[j];
+		if (l != j) {
+		    dswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb);
+		}
+/* L40: */
+	    }
+	}
+    }
+    return 0;
+
+/*     End of DGBTRS */
+
+} /* dgbtrs_ */
+
diff --git a/lapack-netlib/SRC/dgebak.c b/lapack-netlib/SRC/dgebak.c
new file mode 100644
index 000000000..054be75e5
--- /dev/null
+++ b/lapack-netlib/SRC/dgebak.c
@@ -0,0 +1,675 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGEBAK */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEBAK + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebak.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebak.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebak.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, */
+/*                          INFO ) */
+
+/*       CHARACTER          JOB, SIDE */
+/*       INTEGER            IHI, ILO, INFO, LDV, M, N */
+/*       DOUBLE PRECISION   SCALE( * ), V( LDV, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEBAK forms the right or left eigenvectors of a real general matrix */
+/* > by backward transformation on the computed eigenvectors of the */
+/* > balanced matrix output by DGEBAL. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOB */
+/* > \verbatim */
+/* >          JOB is CHARACTER*1 */
+/* >          Specifies the type of backward transformation required: */
+/* >          = 'N': do nothing, return immediately; */
+/* >          = 'P': do backward transformation for permutation only; */
+/* >          = 'S': do backward transformation for scaling only; */
+/* >          = 'B': do backward transformations for both permutation and */
+/* >                 scaling. */
+/* >          JOB must be the same as the argument JOB supplied to DGEBAL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'R':  V contains right eigenvectors; */
+/* >          = 'L':  V contains left eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of rows of the matrix V.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          The integers ILO and IHI determined by DGEBAL. */
+/* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SCALE */
+/* > \verbatim */
+/* >          SCALE is DOUBLE PRECISION array, dimension (N) */
+/* >          Details of the permutation and scaling factors, as returned */
+/* >          by DGEBAL. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of columns of the matrix V.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] V */
+/* > \verbatim */
+/* >          V is DOUBLE PRECISION array, dimension (LDV,M) */
+/* >          On entry, the matrix of right or left eigenvectors to be */
+/* >          transformed, as returned by DHSEIN or DTREVC. */
+/* >          On exit, V is overwritten by the transformed eigenvectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. LDV >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo, 
+	integer *ihi, doublereal *scale, integer *m, doublereal *v, integer *
+	ldv, integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, i__1;
+
+    /* Local variables */
+    integer i__, k;
+    doublereal s;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    logical leftv;
+    integer ii;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical rightv;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Decode and Test the input parameters */
+
+    /* Parameter adjustments */
+    --scale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+
+    /* Function Body */
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
+
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (! rightv && ! leftv) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
+	*info = -4;
+    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*ldv < f2cmax(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBAK", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	goto L30;
+    }
+
+/*     Backward balance */
+
+    if (lsame_(job, "S") || lsame_(job, "B")) {
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = scale[i__];
+		dscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L10: */
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = 1. / scale[i__];
+		dscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
+
+    }
+
+/*     Backward permutation */
+
+/*     For  I = ILO-1 step -1 until 1, */
+/*              IHI+1 step 1 until N do -- */
+
+L30:
+    if (lsame_(job, "P") || lsame_(job, "B")) {
+	if (rightv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L40;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = (integer) scale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L50;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = (integer) scale[i__];
+		if (k == i__) {
+		    goto L50;
+		}
+		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L50:
+		;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DGEBAK */
+
+} /* dgebak_ */
+
diff --git a/lapack-netlib/SRC/dgebal.c b/lapack-netlib/SRC/dgebal.c
new file mode 100644
index 000000000..107f306d8
--- /dev/null
+++ b/lapack-netlib/SRC/dgebal.c
@@ -0,0 +1,840 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b DGEBAL */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEBAL + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebal.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebal.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebal.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) */
+
+/*       CHARACTER          JOB */
+/*       INTEGER            IHI, ILO, INFO, LDA, N */
+/*       DOUBLE PRECISION   A( LDA, * ), SCALE( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEBAL balances a general real matrix A.  This involves, first, */
+/* > permuting A by a similarity transformation to isolate eigenvalues */
+/* > in the first 1 to ILO-1 and last IHI+1 to N elements on the */
+/* > diagonal; and second, applying a diagonal similarity transformation */
+/* > to rows and columns ILO to IHI to make the rows and columns as */
+/* > close in norm as possible.  Both steps are optional. */
+/* > */
+/* > Balancing may reduce the 1-norm of the matrix, and improve the */
+/* > accuracy of the computed eigenvalues and/or eigenvectors. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOB */
+/* > \verbatim */
+/* >          JOB is CHARACTER*1 */
+/* >          Specifies the operations to be performed on A: */
+/* >          = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0 */
+/* >                  for i = 1,...,N; */
+/* >          = 'P':  permute only; */
+/* >          = 'S':  scale only; */
+/* >          = 'B':  both permute and scale. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the input matrix A. */
+/* >          On exit,  A is overwritten by the balanced matrix. */
+/* >          If JOB = 'N', A is not referenced. */
+/* >          See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > \param[out] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          ILO and IHI are set to integers such that on exit */
+/* >          A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. */
+/* >          If JOB = 'N' or 'S', ILO = 1 and IHI = N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is DOUBLE PRECISION array, dimension (N) */
+/* >          Details of the permutations and scaling factors applied to */
+/* >          A.  If P(j) is the index of the row and column interchanged */
+/* >          with row and column j and D(j) is the scaling factor */
+/* >          applied to row and column j, then */
+/* >          SCALE(j) = P(j)    for j = 1,...,ILO-1 */
+/* >                   = D(j)    for j = ILO,...,IHI */
+/* >                   = P(j)    for j = IHI+1,...,N. */
+/* >          The order in which the interchanges are made is N to IHI+1, */
+/* >          then 1 to ILO-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The permutations consist of row and column interchanges which put */
+/* >  the matrix in the form */
+/* > */
+/* >             ( T1   X   Y  ) */
+/* >     P A P = (  0   B   Z  ) */
+/* >             (  0   0   T2 ) */
+/* > */
+/* >  where T1 and T2 are upper triangular matrices whose eigenvalues lie */
+/* >  along the diagonal.  The column indices ILO and IHI mark the starting */
+/* >  and ending columns of the submatrix B. Balancing consists of applying */
+/* >  a diagonal similarity transformation inv(D) * B * D to make the */
+/* >  1-norms of each row of B and its corresponding column nearly equal. */
+/* >  The output matrix is */
+/* > */
+/* >     ( T1     X*D          Y    ) */
+/* >     (  0  inv(D)*B*D  inv(D)*Z ). */
+/* >     (  0      0           T2   ) */
+/* > */
+/* >  Information about the permutations P and the diagonal matrix D is */
+/* >  returned in the vector SCALE. */
+/* > */
+/* >  This subroutine is based on the EISPACK routine BALANC. */
+/* > */
+/* >  Modified by Tzu-Yi Chen, Computer Science Division, University of */
+/* >    California at Berkeley, USA */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer *
+	lda, integer *ilo, integer *ihi, doublereal *scale, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    integer iexc;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    doublereal c__, f, g;
+    integer i__, j, k, l, m;
+    doublereal r__, s;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    doublereal sfmin1, sfmin2, sfmax1, sfmax2, ca, ra;
+    extern doublereal dlamch_(char *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern logical disnan_(doublereal *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical noconv;
+    integer ica, ira;
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --scale;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBAL", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+
+    if (*n == 0) {
+	goto L210;
+    }
+
+    if (lsame_(job, "N")) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scale[i__] = 1.;
+/* L10: */
+	}
+	goto L210;
+    }
+
+    if (lsame_(job, "S")) {
+	goto L120;
+    }
+
+/*     Permutation to isolate eigenvalues if possible */
+
+    goto L50;
+
+/*     Row and column exchange. */
+
+L20:
+    scale[m] = (doublereal) j;
+    if (j == m) {
+	goto L30;
+    }
+
+    dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    i__1 = *n - k + 1;
+    dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+
+L30:
+    switch (iexc) {
+	case 1:  goto L40;
+	case 2:  goto L80;
+    }
+
+/*     Search for rows isolating an eigenvalue and push them down. */
+
+L40:
+    if (l == 1) {
+	goto L210;
+    }
+    --l;
+
+L50:
+    for (j = l; j >= 1; --j) {
+
+	i__1 = l;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (i__ == j) {
+		goto L60;
+	    }
+	    if (a[j + i__ * a_dim1] != 0.) {
+		goto L70;
+	    }
+L60:
+	    ;
+	}
+
+	m = l;
+	iexc = 1;
+	goto L20;
+L70:
+	;
+    }
+
+    goto L90;
+
+/*     Search for columns isolating an eigenvalue and push them left. */
+
+L80:
+    ++k;
+
+L90:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+
+	i__2 = l;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    if (i__ == j) {
+		goto L100;
+	    }
+	    if (a[i__ + j * a_dim1] != 0.) {
+		goto L110;
+	    }
+L100:
+	    ;
+	}
+
+	m = k;
+	iexc = 2;
+	goto L20;
+L110:
+	;
+    }
+
+L120:
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	scale[i__] = 1.;
+/* L130: */
+    }
+
+    if (lsame_(job, "P")) {
+	goto L210;
+    }
+
+/*     Balance the submatrix in rows K to L. */
+
+/*     Iterative loop for norm reduction */
+
+    sfmin1 = dlamch_("S") / dlamch_("P");
+    sfmax1 = 1. / sfmin1;
+    sfmin2 = sfmin1 * 2.;
+    sfmax2 = 1. / sfmin2;
+
+L140:
+    noconv = FALSE_;
+
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+
+	i__2 = l - k + 1;
+	c__ = dnrm2_(&i__2, &a[k + i__ * a_dim1], &c__1);
+	i__2 = l - k + 1;
+	r__ = dnrm2_(&i__2, &a[i__ + k * a_dim1], lda);
+	ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1));
+	i__2 = *n - k + 1;
+	ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1));
+
+/*        Guard against zero C or R due to underflow. */
+
+	if (c__ == 0. || r__ == 0.) {
+	    goto L200;
+	}
+	g = r__ / 2.;
+	f = 1.;
+	s = c__ + r__;
+L160:
+/* Computing MAX */
+	d__1 = f2cmax(f,c__);
+/* Computing MIN */
+	d__2 = f2cmin(r__,g);
+	if (c__ >= g || f2cmax(d__1,ca) >= sfmax2 || f2cmin(d__2,ra) <= sfmin2) {
+	    goto L170;
+	}
+	d__1 = c__ + f + ca + r__ + g + ra;
+	if (disnan_(&d__1)) {
+
+/*           Exit if NaN to avoid infinite loop */
+
+	    *info = -3;
+	    i__2 = -(*info);
+	    xerbla_("DGEBAL", &i__2, (ftnlen)6);
+	    return 0;
+	}
+	f *= 2.;
+	c__ *= 2.;
+	ca *= 2.;
+	r__ /= 2.;
+	g /= 2.;
+	ra /= 2.;
+	goto L160;
+
+L170:
+	g = c__ / 2.;
+L180:
+/* Computing MIN */
+	d__1 = f2cmin(f,c__), d__1 = f2cmin(d__1,g);
+	if (g < r__ || f2cmax(r__,ra) >= sfmax2 || f2cmin(d__1,ca) <= sfmin2) {
+	    goto L190;
+	}
+	f /= 2.;
+	c__ /= 2.;
+	g /= 2.;
+	ca /= 2.;
+	r__ *= 2.;
+	ra *= 2.;
+	goto L180;
+
+/*        Now balance. */
+
+L190:
+	if (c__ + r__ >= s * .95) {
+	    goto L200;
+	}
+	if (f < 1. && scale[i__] < 1.) {
+	    if (f * scale[i__] <= sfmin1) {
+		goto L200;
+	    }
+	}
+	if (f > 1. && scale[i__] > 1.) {
+	    if (scale[i__] >= sfmax1 / f) {
+		goto L200;
+	    }
+	}
+	g = 1. / f;
+	scale[i__] *= f;
+	noconv = TRUE_;
+
+	i__2 = *n - k + 1;
+	dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+
+L200:
+	;
+    }
+
+    if (noconv) {
+	goto L140;
+    }
+
+L210:
+    *ilo = k;
+    *ihi = l;
+
+    return 0;
+
+/*     End of DGEBAL */
+
+} /* dgebal_ */
+
diff --git a/lapack-netlib/SRC/dgebd2.c b/lapack-netlib/SRC/dgebd2.c
new file mode 100644
index 000000000..2cd70809c
--- /dev/null
+++ b/lapack-netlib/SRC/dgebd2.c
@@ -0,0 +1,745 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEBD2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebd2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebd2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebd2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ), */
+/*      $                   TAUQ( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEBD2 reduces a real general m by n matrix A to upper or lower */
+/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */
+/* > */
+/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows in the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns in the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the m by n general matrix to be reduced. */
+/* >          On exit, */
+/* >          if m >= n, the diagonal and the first superdiagonal are */
+/* >            overwritten with the upper bidiagonal matrix B; the */
+/* >            elements below the diagonal, with the array TAUQ, represent */
+/* >            the orthogonal matrix Q as a product of elementary */
+/* >            reflectors, and the elements above the first superdiagonal, */
+/* >            with the array TAUP, represent the orthogonal matrix P as */
+/* >            a product of elementary reflectors; */
+/* >          if m < n, the diagonal and the first subdiagonal are */
+/* >            overwritten with the lower bidiagonal matrix B; the */
+/* >            elements below the first subdiagonal, with the array TAUQ, */
+/* >            represent the orthogonal matrix Q as a product of */
+/* >            elementary reflectors, and the elements above the diagonal, */
+/* >            with the array TAUP, represent the orthogonal matrix P as */
+/* >            a product of elementary reflectors. */
+/* >          See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] D */
+/* > \verbatim */
+/* >          D is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The diagonal elements of the bidiagonal matrix B: */
+/* >          D(i) = A(i,i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] E */
+/* > \verbatim */
+/* >          E is DOUBLE PRECISION array, dimension (f2cmin(M,N)-1) */
+/* >          The off-diagonal elements of the bidiagonal matrix B: */
+/* >          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */
+/* >          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUQ */
+/* > \verbatim */
+/* >          TAUQ is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix Q. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUP */
+/* > \verbatim */
+/* >          TAUP is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix P. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (f2cmax(M,N)) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit. */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrices Q and P are represented as products of elementary */
+/* >  reflectors: */
+/* > */
+/* >  If m >= n, */
+/* > */
+/* >     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1) */
+/* > */
+/* >  Each H(i) and G(i) has the form: */
+/* > */
+/* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
+/* > */
+/* >  where tauq and taup are real scalars, and v and u are real vectors; */
+/* >  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */
+/* >  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */
+/* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
+/* > */
+/* >  If m < n, */
+/* > */
+/* >     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m) */
+/* > */
+/* >  Each H(i) and G(i) has the form: */
+/* > */
+/* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
+/* > */
+/* >  where tauq and taup are real scalars, and v and u are real vectors; */
+/* >  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */
+/* >  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */
+/* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
+/* > */
+/* >  The contents of A on exit are illustrated by the following examples: */
+/* > */
+/* >  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n): */
+/* > */
+/* >    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 ) */
+/* >    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 ) */
+/* >    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 ) */
+/* >    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 ) */
+/* >    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 ) */
+/* >    (  v1  v2  v3  v4  v5 ) */
+/* > */
+/* >  where d and e denote diagonal and off-diagonal elements of B, vi */
+/* >  denotes an element of the vector defining H(i), and ui an element of */
+/* >  the vector defining G(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
+	taup, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *), dlarfg_(integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBD2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * 
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    if (i__ < *n) {
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &
+			tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]
+			);
+	    }
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *n) {
+
+/*              Generate elementary reflector G(i) to annihilate */
+/*              A(i,i+2:n) */
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + f2cmin(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
+
+/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], 
+			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], 
+			lda, &work[1]);
+		a[i__ + (i__ + 1) * a_dim1] = e[i__];
+	    } else {
+		taup[i__] = 0.;
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * 
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+
+/*           Apply G(i) to A(i+1:m,i:n) from the right */
+
+	    if (i__ < *m) {
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &
+			taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *m) {
+
+/*              Generate elementary reflector H(i) to annihilate */
+/*              A(i+2:m,i) */
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*m) + 
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*              Apply H(i) to A(i+1:m,i+1:n) from the left */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], 
+			lda, &work[1]);
+		a[i__ + 1 + i__ * a_dim1] = e[i__];
+	    } else {
+		tauq[i__] = 0.;
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of DGEBD2 */
+
+} /* dgebd2_ */
+
diff --git a/lapack-netlib/SRC/dgebrd.c b/lapack-netlib/SRC/dgebrd.c
new file mode 100644
index 000000000..72a91940c
--- /dev/null
+++ b/lapack-netlib/SRC/dgebrd.c
@@ -0,0 +1,784 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+static doublereal c_b21 = -1.;
+static doublereal c_b22 = 1.;
+
+/* > \brief \b DGEBRD */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEBRD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgebrd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgebrd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgebrd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, */
+/*                          INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ), */
+/*      $                   TAUQ( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEBRD reduces a general real M-by-N matrix A to upper or lower */
+/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */
+/* > */
+/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows in the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns in the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N general matrix to be reduced. */
+/* >          On exit, */
+/* >          if m >= n, the diagonal and the first superdiagonal are */
+/* >            overwritten with the upper bidiagonal matrix B; the */
+/* >            elements below the diagonal, with the array TAUQ, represent */
+/* >            the orthogonal matrix Q as a product of elementary */
+/* >            reflectors, and the elements above the first superdiagonal, */
+/* >            with the array TAUP, represent the orthogonal matrix P as */
+/* >            a product of elementary reflectors; */
+/* >          if m < n, the diagonal and the first subdiagonal are */
+/* >            overwritten with the lower bidiagonal matrix B; the */
+/* >            elements below the first subdiagonal, with the array TAUQ, */
+/* >            represent the orthogonal matrix Q as a product of */
+/* >            elementary reflectors, and the elements above the diagonal, */
+/* >            with the array TAUP, represent the orthogonal matrix P as */
+/* >            a product of elementary reflectors. */
+/* >          See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] D */
+/* > \verbatim */
+/* >          D is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The diagonal elements of the bidiagonal matrix B: */
+/* >          D(i) = A(i,i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] E */
+/* > \verbatim */
+/* >          E is DOUBLE PRECISION array, dimension (f2cmin(M,N)-1) */
+/* >          The off-diagonal elements of the bidiagonal matrix B: */
+/* >          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */
+/* >          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUQ */
+/* > \verbatim */
+/* >          TAUQ is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix Q. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAUP */
+/* > \verbatim */
+/* >          TAUP is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors which */
+/* >          represent the orthogonal matrix P. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The length of the array WORK.  LWORK >= f2cmax(1,M,N). */
+/* >          For optimum performance LWORK >= (M+N)*NB, where NB */
+/* >          is the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrices Q and P are represented as products of elementary */
+/* >  reflectors: */
+/* > */
+/* >  If m >= n, */
+/* > */
+/* >     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1) */
+/* > */
+/* >  Each H(i) and G(i) has the form: */
+/* > */
+/* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
+/* > */
+/* >  where tauq and taup are real scalars, and v and u are real vectors; */
+/* >  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */
+/* >  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */
+/* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
+/* > */
+/* >  If m < n, */
+/* > */
+/* >     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m) */
+/* > */
+/* >  Each H(i) and G(i) has the form: */
+/* > */
+/* >     H(i) = I - tauq * v * v**T  and G(i) = I - taup * u * u**T */
+/* > */
+/* >  where tauq and taup are real scalars, and v and u are real vectors; */
+/* >  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */
+/* >  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */
+/* >  tauq is stored in TAUQ(i) and taup in TAUP(i). */
+/* > */
+/* >  The contents of A on exit are illustrated by the following examples: */
+/* > */
+/* >  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n): */
+/* > */
+/* >    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 ) */
+/* >    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 ) */
+/* >    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 ) */
+/* >    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 ) */
+/* >    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 ) */
+/* >    (  v1  v2  v3  v4  v5 ) */
+/* > */
+/* >  where d and e denote diagonal and off-diagonal elements of B, vi */
+/* >  denotes an element of the vector defining H(i), and ui an element of */
+/* >  the vector defining G(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
+	taup, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, j;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer nbmin, iinfo, minmn;
+    extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *);
+    integer nb;
+    extern /* Subroutine */ int dlabrd_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, doublereal *, integer *, doublereal *, integer *);
+    integer nx, ws;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwrkx, ldwrky, lwkopt;
+    logical lquery;
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = f2cmax(i__1,i__2);
+    lwkopt = (*m + *n) * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*m);
+	if (*lwork < f2cmax(i__1,*n) && ! lquery) {
+	    *info = -10;
+	}
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBRD", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    minmn = f2cmin(*m,*n);
+    if (minmn == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    ws = f2cmax(*m,*n);
+    ldwrkx = *m;
+    ldwrky = *n;
+
+    if (nb > 1 && nb < minmn) {
+
+/*        Set the crossover point NX. */
+
+/* Computing MAX */
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+
+/*        Determine when to switch from blocked to unblocked code. */
+
+	if (nx < minmn) {
+	    ws = (*m + *n) * nb;
+	    if (*lwork < ws) {
+
+/*              Not enough work space for the optimal NB, consider using */
+/*              a smaller block size. */
+
+		nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		if (*lwork >= (*m + *n) * nbmin) {
+		    nb = *lwork / (*m + *n);
+		} else {
+		    nb = 1;
+		    nx = minmn;
+		}
+	    }
+	}
+    } else {
+	nx = minmn;
+    }
+
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+
+/*        Reduce rows and columns i:i+nb-1 to bidiagonal form and return */
+/*        the matrices X and Y which are needed to update the unreduced */
+/*        part of the matrix */
+
+	i__3 = *m - i__ + 1;
+	i__4 = *n - i__ + 1;
+	dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx 
+		* nb + 1], &ldwrky);
+
+/*        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update */
+/*        of the form  A := A - V*Y**T - X*U**T */
+
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ 
+		+ nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
+		ldwrky, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, &
+		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
+		c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*        Copy diagonal and off-diagonal elements of B back into A */
+
+	if (*m >= *n) {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + (j + 1) * a_dim1] = e[j];
+/* L10: */
+	    }
+	} else {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + 1 + j * a_dim1] = e[j];
+/* L20: */
+	    }
+	}
+/* L30: */
+    }
+
+/*     Use unblocked code to reduce the remainder of the matrix */
+
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+	    tauq[i__], &taup[i__], &work[1], &iinfo);
+    work[1] = (doublereal) ws;
+    return 0;
+
+/*     End of DGEBRD */
+
+} /* dgebrd_ */
+
diff --git a/lapack-netlib/SRC/dgecon.c b/lapack-netlib/SRC/dgecon.c
new file mode 100644
index 000000000..fe47ed355
--- /dev/null
+++ b/lapack-netlib/SRC/dgecon.c
@@ -0,0 +1,658 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b DGECON */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGECON + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgecon.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgecon.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgecon.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, */
+/*                          INFO ) */
+
+/*       CHARACTER          NORM */
+/*       INTEGER            INFO, LDA, N */
+/*       DOUBLE PRECISION   ANORM, RCOND */
+/*       INTEGER            IWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGECON estimates the reciprocal of the condition number of a general */
+/* > real matrix A, in either the 1-norm or the infinity-norm, using */
+/* > the LU factorization computed by DGETRF. */
+/* > */
+/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */
+/* > condition number is computed as */
+/* >    RCOND = 1 / ( norm(A) * norm(inv(A)) ). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] NORM */
+/* > \verbatim */
+/* >          NORM is CHARACTER*1 */
+/* >          Specifies whether the 1-norm condition number or the */
+/* >          infinity-norm condition number is required: */
+/* >          = '1' or 'O':  1-norm; */
+/* >          = 'I':         Infinity-norm. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          The factors L and U from the factorization A = P*L*U */
+/* >          as computed by DGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ANORM */
+/* > \verbatim */
+/* >          ANORM is DOUBLE PRECISION */
+/* >          If NORM = '1' or 'O', the 1-norm of the original matrix A. */
+/* >          If NORM = 'I', the infinity-norm of the original matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >          The reciprocal of the condition number of the matrix A, */
+/* >          computed as RCOND = 1/(norm(A) * norm(inv(A))). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (4*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgecon_(char *norm, integer *n, doublereal *a, integer *
+	lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer *
+	iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1;
+    doublereal d__1;
+
+    /* Local variables */
+    integer kase, kase1;
+    doublereal scale;
+    extern logical lsame_(char *, char *);
+    integer isave[3];
+    extern /* Subroutine */ int drscl_(integer *, doublereal *, doublereal *, 
+	    integer *), dlacn2_(integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *);
+    doublereal sl;
+    integer ix;
+    extern integer idamax_(integer *, doublereal *, integer *);
+    doublereal su;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    doublereal ainvnm;
+    extern /* Subroutine */ int dlatrs_(char *, char *, char *, char *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *);
+    logical onenrm;
+    char normin[1];
+    doublereal smlnum;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
+    if (! onenrm && ! lsame_(norm, "I")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -4;
+    } else if (*anorm < 0.) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGECON", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    *rcond = 0.;
+    if (*n == 0) {
+	*rcond = 1.;
+	return 0;
+    } else if (*anorm == 0.) {
+	return 0;
+    }
+
+    smlnum = dlamch_("Safe minimum");
+
+/*     Estimate the norm of inv(A). */
+
+    ainvnm = 0.;
+    *(unsigned char *)normin = 'N';
+    if (onenrm) {
+	kase1 = 1;
+    } else {
+	kase1 = 2;
+    }
+    kase = 0;
+L10:
+    dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave);
+    if (kase != 0) {
+	if (kase == kase1) {
+
+/*           Multiply by inv(L). */
+
+	    dlatrs_("Lower", "No transpose", "Unit", normin, n, &a[a_offset], 
+		    lda, &work[1], &sl, &work[(*n << 1) + 1], info);
+
+/*           Multiply by inv(U). */
+
+	    dlatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[
+		    a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info);
+	} else {
+
+/*           Multiply by inv(U**T). */
+
+	    dlatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset],
+		     lda, &work[1], &su, &work[*n * 3 + 1], info);
+
+/*           Multiply by inv(L**T). */
+
+	    dlatrs_("Lower", "Transpose", "Unit", normin, n, &a[a_offset], 
+		    lda, &work[1], &sl, &work[(*n << 1) + 1], info);
+	}
+
+/*        Divide X by 1/(SL*SU) if doing so will not cause overflow. */
+
+	scale = sl * su;
+	*(unsigned char *)normin = 'Y';
+	if (scale != 1.) {
+	    ix = idamax_(n, &work[1], &c__1);
+	    if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) 
+		    {
+		goto L20;
+	    }
+	    drscl_(n, &scale, &work[1], &c__1);
+	}
+	goto L10;
+    }
+
+/*     Compute the estimate of the reciprocal condition number. */
+
+    if (ainvnm != 0.) {
+	*rcond = 1. / ainvnm / *anorm;
+    }
+
+L20:
+    return 0;
+
+/*     End of DGECON */
+
+} /* dgecon_ */
+
diff --git a/lapack-netlib/SRC/dgeequ.c b/lapack-netlib/SRC/dgeequ.c
new file mode 100644
index 000000000..504c79619
--- /dev/null
+++ b/lapack-netlib/SRC/dgeequ.c
@@ -0,0 +1,733 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGEEQU */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEEQU + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeequ.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeequ.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeequ.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */
+/*                          INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       DOUBLE PRECISION   AMAX, COLCND, ROWCND */
+/*       DOUBLE PRECISION   A( LDA, * ), C( * ), R( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEEQU computes row and column scalings intended to equilibrate an */
+/* > M-by-N matrix A and reduce its condition number.  R returns the row */
+/* > scale factors and C the column scale factors, chosen to try to make */
+/* > the largest element in each row and column of the matrix B with */
+/* > elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */
+/* > */
+/* > R(i) and C(j) are restricted to be between SMLNUM = smallest safe */
+/* > number and BIGNUM = largest safe number.  Use of these scaling */
+/* > factors is not guaranteed to reduce the condition number of A but */
+/* > works well in practice. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          The M-by-N matrix whose equilibration factors are */
+/* >          to be computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (M) */
+/* >          If INFO = 0 or INFO > M, R contains the row scale factors */
+/* >          for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >          If INFO = 0,  C contains the column scale factors for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ROWCND */
+/* > \verbatim */
+/* >          ROWCND is DOUBLE PRECISION */
+/* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
+/* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
+/* >          AMAX is neither too large nor too small, it is not worth */
+/* >          scaling by R. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] COLCND */
+/* > \verbatim */
+/* >          COLCND is DOUBLE PRECISION */
+/* >          If INFO = 0, COLCND contains the ratio of the smallest */
+/* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
+/* >          worth scaling by C. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] AMAX */
+/* > \verbatim */
+/* >          AMAX is DOUBLE PRECISION */
+/* >          Absolute value of largest matrix element.  If AMAX is very */
+/* >          close to overflow or very close to underflow, the matrix */
+/* >          should be scaled. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i,  and i is */
+/* >                <= M:  the i-th row of A is exactly zero */
+/* >                >  M:  the (i-M)-th column of A is exactly zero */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgeequ_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal 
+	*colcnd, doublereal *amax, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    integer i__, j;
+    doublereal rcmin, rcmax;
+    extern doublereal dlamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    doublereal bignum, smlnum;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --r__;
+    --c__;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEEQU", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	*rowcnd = 1.;
+	*colcnd = 1.;
+	*amax = 0.;
+	return 0;
+    }
+
+/*     Get machine constants. */
+
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+
+/*     Compute row scale factors. */
+
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__[i__] = 0.;
+/* L10: */
+    }
+
+/*     Find the maximum element in each row. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+	    r__[i__] = f2cmax(d__2,d__3);
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.;
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__1 = rcmax, d__2 = r__[i__];
+	rcmax = f2cmax(d__1,d__2);
+/* Computing MIN */
+	d__1 = rcmin, d__2 = r__[i__];
+	rcmin = f2cmin(d__1,d__2);
+/* L40: */
+    }
+    *amax = rcmax;
+
+    if (rcmin == 0.) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (r__[i__] == 0.) {
+		*info = i__;
+		return 0;
+	    }
+/* L50: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MIN */
+/* Computing MAX */
+	    d__2 = r__[i__];
+	    d__1 = f2cmax(d__2,smlnum);
+	    r__[i__] = 1. / f2cmin(d__1,bignum);
+/* L60: */
+	}
+
+/*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)) */
+
+	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+/*     Compute column scale factors */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	c__[j] = 0.;
+/* L70: */
+    }
+
+/*     Find the maximum element in each column, */
+/*     assuming the row scaling computed above. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * 
+		    r__[i__];
+	    c__[j] = f2cmax(d__2,d__3);
+/* L80: */
+	}
+/* L90: */
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	d__1 = rcmin, d__2 = c__[j];
+	rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+	d__1 = rcmax, d__2 = c__[j];
+	rcmax = f2cmax(d__1,d__2);
+/* L100: */
+    }
+
+    if (rcmin == 0.) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (c__[j] == 0.) {
+		*info = *m + j;
+		return 0;
+	    }
+/* L110: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+/* Computing MAX */
+	    d__2 = c__[j];
+	    d__1 = f2cmax(d__2,smlnum);
+	    c__[j] = 1. / f2cmin(d__1,bignum);
+/* L120: */
+	}
+
+/*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)) */
+
+	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+    return 0;
+
+/*     End of DGEEQU */
+
+} /* dgeequ_ */
+
diff --git a/lapack-netlib/SRC/dgeequb.c b/lapack-netlib/SRC/dgeequb.c
new file mode 100644
index 000000000..49397083a
--- /dev/null
+++ b/lapack-netlib/SRC/dgeequb.c
@@ -0,0 +1,753 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGEEQUB */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEEQUB + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeequb
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeequb
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeequb
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */
+/*                           INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       DOUBLE PRECISION   AMAX, COLCND, ROWCND */
+/*       DOUBLE PRECISION   A( LDA, * ), C( * ), R( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEEQUB computes row and column scalings intended to equilibrate an */
+/* > M-by-N matrix A and reduce its condition number.  R returns the row */
+/* > scale factors and C the column scale factors, chosen to try to make */
+/* > the largest element in each row and column of the matrix B with */
+/* > elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */
+/* > the radix. */
+/* > */
+/* > R(i) and C(j) are restricted to be a power of the radix between */
+/* > SMLNUM = smallest safe number and BIGNUM = largest safe number.  Use */
+/* > of these scaling factors is not guaranteed to reduce the condition */
+/* > number of A but works well in practice. */
+/* > */
+/* > This routine differs from DGEEQU by restricting the scaling factors */
+/* > to a power of the radix.  Barring over- and underflow, scaling by */
+/* > these factors introduces no additional rounding errors.  However, the */
+/* > scaled entries' magnitudes are no longer approximately 1 but lie */
+/* > between sqrt(radix) and 1/sqrt(radix). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          The M-by-N matrix whose equilibration factors are */
+/* >          to be computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (M) */
+/* >          If INFO = 0 or INFO > M, R contains the row scale factors */
+/* >          for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >          If INFO = 0,  C contains the column scale factors for A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ROWCND */
+/* > \verbatim */
+/* >          ROWCND is DOUBLE PRECISION */
+/* >          If INFO = 0 or INFO > M, ROWCND contains the ratio of the */
+/* >          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and */
+/* >          AMAX is neither too large nor too small, it is not worth */
+/* >          scaling by R. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] COLCND */
+/* > \verbatim */
+/* >          COLCND is DOUBLE PRECISION */
+/* >          If INFO = 0, COLCND contains the ratio of the smallest */
+/* >          C(i) to the largest C(i).  If COLCND >= 0.1, it is not */
+/* >          worth scaling by C. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] AMAX */
+/* > \verbatim */
+/* >          AMAX is DOUBLE PRECISION */
+/* >          Absolute value of largest matrix element.  If AMAX is very */
+/* >          close to overflow or very close to underflow, the matrix */
+/* >          should be scaled. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i,  and i is */
+/* >                <= M:  the i-th row of A is exactly zero */
+/* >                >  M:  the (i-M)-th column of A is exactly zero */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgeequb_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal 
+	*colcnd, doublereal *amax, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    integer i__, j;
+    doublereal radix, rcmin, rcmax;
+    extern doublereal dlamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    doublereal bignum, logrdx, smlnum;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --r__;
+    --c__;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEEQUB", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0) {
+	*rowcnd = 1.;
+	*colcnd = 1.;
+	*amax = 0.;
+	return 0;
+    }
+
+/*     Get machine constants.  Assume SMLNUM is a power of the radix. */
+
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+    radix = dlamch_("B");
+    logrdx = log(radix);
+
+/*     Compute row scale factors. */
+
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__[i__] = 0.;
+/* L10: */
+    }
+
+/*     Find the maximum element in each row. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    d__2 = r__[i__], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+	    r__[i__] = f2cmax(d__2,d__3);
+/* L20: */
+	}
+/* L30: */
+    }
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (r__[i__] > 0.) {
+	    i__2 = (integer) (log(r__[i__]) / logrdx);
+	    r__[i__] = pow_di(&radix, &i__2);
+	}
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.;
+    i__1 = *m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__1 = rcmax, d__2 = r__[i__];
+	rcmax = f2cmax(d__1,d__2);
+/* Computing MIN */
+	d__1 = rcmin, d__2 = r__[i__];
+	rcmin = f2cmin(d__1,d__2);
+/* L40: */
+    }
+    *amax = rcmax;
+
+    if (rcmin == 0.) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (r__[i__] == 0.) {
+		*info = i__;
+		return 0;
+	    }
+/* L50: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MIN */
+/* Computing MAX */
+	    d__2 = r__[i__];
+	    d__1 = f2cmax(d__2,smlnum);
+	    r__[i__] = 1. / f2cmin(d__1,bignum);
+/* L60: */
+	}
+
+/*        Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)). */
+
+	*rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+/*     Compute column scale factors */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	c__[j] = 0.;
+/* L70: */
+    }
+
+/*     Find the maximum element in each column, */
+/*     assuming the row scaling computed above. */
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    d__2 = c__[j], d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)) * 
+		    r__[i__];
+	    c__[j] = f2cmax(d__2,d__3);
+/* L80: */
+	}
+	if (c__[j] > 0.) {
+	    i__2 = (integer) (log(c__[j]) / logrdx);
+	    c__[j] = pow_di(&radix, &i__2);
+	}
+/* L90: */
+    }
+
+/*     Find the maximum and minimum scale factors. */
+
+    rcmin = bignum;
+    rcmax = 0.;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	d__1 = rcmin, d__2 = c__[j];
+	rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+	d__1 = rcmax, d__2 = c__[j];
+	rcmax = f2cmax(d__1,d__2);
+/* L100: */
+    }
+
+    if (rcmin == 0.) {
+
+/*        Find the first zero scale factor and return an error code. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (c__[j] == 0.) {
+		*info = *m + j;
+		return 0;
+	    }
+/* L110: */
+	}
+    } else {
+
+/*        Invert the scale factors. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+/* Computing MAX */
+	    d__2 = c__[j];
+	    d__1 = f2cmax(d__2,smlnum);
+	    c__[j] = 1. / f2cmin(d__1,bignum);
+/* L120: */
+	}
+
+/*        Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)). */
+
+	*colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+    }
+
+    return 0;
+
+/*     End of DGEEQUB */
+
+} /* dgeequb_ */
+
diff --git a/lapack-netlib/SRC/dgees.c b/lapack-netlib/SRC/dgees.c
new file mode 100644
index 000000000..7fdac4a7d
--- /dev/null
+++ b/lapack-netlib/SRC/dgees.c
@@ -0,0 +1,1002 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief <b> DGEES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors f
+or GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEES + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgees.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgees.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgees.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, */
+/*                         VS, LDVS, WORK, LWORK, BWORK, INFO ) */
+
+/*       CHARACTER          JOBVS, SORT */
+/*       INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM */
+/*       LOGICAL            BWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), */
+/*      $                   WR( * ) */
+/*       LOGICAL            SELECT */
+/*       EXTERNAL           SELECT */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEES computes for an N-by-N real nonsymmetric matrix A, the */
+/* > eigenvalues, the real Schur form T, and, optionally, the matrix of */
+/* > Schur vectors Z.  This gives the Schur factorization A = Z*T*(Z**T). */
+/* > */
+/* > Optionally, it also orders the eigenvalues on the diagonal of the */
+/* > real Schur form so that selected eigenvalues are at the top left. */
+/* > The leading columns of Z then form an orthonormal basis for the */
+/* > invariant subspace corresponding to the selected eigenvalues. */
+/* > */
+/* > A matrix is in real Schur form if it is upper quasi-triangular with */
+/* > 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the */
+/* > form */
+/* >         [  a  b  ] */
+/* >         [  c  a  ] */
+/* > */
+/* > where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVS */
+/* > \verbatim */
+/* >          JOBVS is CHARACTER*1 */
+/* >          = 'N': Schur vectors are not computed; */
+/* >          = 'V': Schur vectors are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SORT */
+/* > \verbatim */
+/* >          SORT is CHARACTER*1 */
+/* >          Specifies whether or not to order the eigenvalues on the */
+/* >          diagonal of the Schur form. */
+/* >          = 'N': Eigenvalues are not ordered; */
+/* >          = 'S': Eigenvalues are ordered (see SELECT). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SELECT */
+/* > \verbatim */
+/* >          SELECT is a LOGICAL FUNCTION of two DOUBLE PRECISION arguments */
+/* >          SELECT must be declared EXTERNAL in the calling subroutine. */
+/* >          If SORT = 'S', SELECT is used to select eigenvalues to sort */
+/* >          to the top left of the Schur form. */
+/* >          If SORT = 'N', SELECT is not referenced. */
+/* >          An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */
+/* >          SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex */
+/* >          conjugate pair of eigenvalues is selected, then both complex */
+/* >          eigenvalues are selected. */
+/* >          Note that a selected complex eigenvalue may no longer */
+/* >          satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */
+/* >          ordering may change the value of complex eigenvalues */
+/* >          (especially if the eigenvalue is ill-conditioned); in this */
+/* >          case INFO is set to N+2 (see INFO below). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the N-by-N matrix A. */
+/* >          On exit, A has been overwritten by its real Schur form T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SDIM */
+/* > \verbatim */
+/* >          SDIM is INTEGER */
+/* >          If SORT = 'N', SDIM = 0. */
+/* >          If SORT = 'S', SDIM = number of eigenvalues (after sorting) */
+/* >                         for which SELECT is true. (Complex conjugate */
+/* >                         pairs for which SELECT is true for either */
+/* >                         eigenvalue count as 2.) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WR */
+/* > \verbatim */
+/* >          WR is DOUBLE PRECISION array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WI */
+/* > \verbatim */
+/* >          WI is DOUBLE PRECISION array, dimension (N) */
+/* >          WR and WI contain the real and imaginary parts, */
+/* >          respectively, of the computed eigenvalues in the same order */
+/* >          that they appear on the diagonal of the output Schur form T. */
+/* >          Complex conjugate pairs of eigenvalues will appear */
+/* >          consecutively with the eigenvalue having the positive */
+/* >          imaginary part first. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VS */
+/* > \verbatim */
+/* >          VS is DOUBLE PRECISION array, dimension (LDVS,N) */
+/* >          If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */
+/* >          vectors. */
+/* >          If JOBVS = 'N', VS is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVS */
+/* > \verbatim */
+/* >          LDVS is INTEGER */
+/* >          The leading dimension of the array VS.  LDVS >= 1; if */
+/* >          JOBVS = 'V', LDVS >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) contains the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,3*N). */
+/* >          For good performance, LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BWORK */
+/* > \verbatim */
+/* >          BWORK is LOGICAL array, dimension (N) */
+/* >          Not referenced if SORT = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0: if INFO = i, and i is */
+/* >             <= N: the QR algorithm failed to compute all the */
+/* >                   eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */
+/* >                   contain those eigenvalues which have converged; if */
+/* >                   JOBVS = 'V', VS contains the matrix which reduces A */
+/* >                   to its partially converged Schur form. */
+/* >             = N+1: the eigenvalues could not be reordered because some */
+/* >                   eigenvalues were too close to separate (the problem */
+/* >                   is very ill-conditioned); */
+/* >             = N+2: after reordering, roundoff changed values of some */
+/* >                   complex eigenvalues so that leading eigenvalues in */
+/* >                   the Schur form no longer satisfy SELECT=.TRUE.  This */
+/* >                   could also be caused by underflow due to scaling. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgees_(char *jobvs, char *sort, L_fp select, integer *n, 
+	doublereal *a, integer *lda, integer *sdim, doublereal *wr, 
+	doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work, 
+	integer *lwork, logical *bwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer ibal;
+    doublereal anrm;
+    integer idum[1], ierr, itau, iwrk, inxt, i__;
+    doublereal s;
+    integer icond, ieval;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *), dswap_(integer *, doublereal *, integer 
+	    *, doublereal *, integer *);
+    logical cursl;
+    integer i1, i2;
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
+	    char *, char *, integer *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *), 
+	    dgebal_(char *, integer *, doublereal *, integer *, integer *, 
+	    integer *, doublereal *, integer *);
+    logical lst2sl, scalea;
+    integer ip;
+    doublereal cscale;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dlascl_(char *, integer *, integer *, doublereal *, 
+	    doublereal *, integer *, integer *, doublereal *, integer *, 
+	    integer *), dlacpy_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    doublereal bignum;
+    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dhseqr_(char *, char *, integer *, integer *, integer 
+	    *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *), dtrsen_(char *, char *, logical *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *, integer *, integer *);
+    logical lastsl;
+    integer minwrk, maxwrk;
+    doublereal smlnum;
+    integer hswork;
+    logical wantst, lquery, wantvs;
+    integer ihi, ilo;
+    doublereal dum[1], eps, sep;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vs_dim1 = *ldvs;
+    vs_offset = 1 + vs_dim1 * 1;
+    vs -= vs_offset;
+    --work;
+    --bwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvs = lsame_(jobvs, "V");
+    wantst = lsame_(sort, "S");
+    if (! wantvs && ! lsame_(jobvs, "N")) {
+	*info = -1;
+    } else if (! wantst && ! lsame_(sort, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -6;
+    } else if (*ldvs < 1 || wantvs && *ldvs < *n) {
+	*info = -11;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+/*       HSWORK refers to the workspace preferred by DHSEQR, as */
+/*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */
+/*       the worst case.) */
+
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
+	} else {
+	    maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, 
+		    n, &c__0, (ftnlen)6, (ftnlen)1);
+	    minwrk = *n * 3;
+
+	    dhseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1]
+		    , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval);
+	    hswork = (integer) work[1];
+
+	    if (! wantvs) {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    } else {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, 
+			"DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	}
+	work[1] = (doublereal) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEES ", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	*sdim = 0;
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = dlamch_("P");
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0. && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Permute the matrix to make it more nearly triangular */
+/*     (Workspace: need N) */
+
+    ibal = 1;
+    dgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*     Reduce to upper Hessenberg form */
+/*     (Workspace: need 3*N, prefer 2*N+N*NB) */
+
+    itau = *n + ibal;
+    iwrk = *n + itau;
+    i__1 = *lwork - iwrk + 1;
+    dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvs) {
+
+/*        Copy Householder vectors to VS */
+
+	dlacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs)
+		;
+
+/*        Generate orthogonal matrix in VS */
+/*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+    }
+
+    *sdim = 0;
+
+/*     Perform QR iteration, accumulating Schur vectors in VS if desired */
+/*     (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+    iwrk = itau;
+    i__1 = *lwork - iwrk + 1;
+    dhseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[
+	    vs_offset], ldvs, &work[iwrk], &i__1, &ieval);
+    if (ieval > 0) {
+	*info = ieval;
+    }
+
+/*     Sort eigenvalues if desired */
+
+    if (wantst && *info == 0) {
+	if (scalea) {
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, &
+		    ierr);
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, &
+		    ierr);
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    bwork[i__] = (*select)(&wr[i__], &wi[i__]);
+/* L10: */
+	}
+
+/*        Reorder eigenvalues and transform Schur vectors */
+/*        (Workspace: none needed) */
+
+	i__1 = *lwork - iwrk + 1;
+	dtrsen_("N", jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], 
+		ldvs, &wr[1], &wi[1], sdim, &s, &sep, &work[iwrk], &i__1, 
+		idum, &c__1, &icond);
+	if (icond > 0) {
+	    *info = *n + icond;
+	}
+    }
+
+    if (wantvs) {
+
+/*        Undo balancing */
+/*        (Workspace: need N) */
+
+	dgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs,
+		 &ierr);
+    }
+
+    if (scalea) {
+
+/*        Undo scaling for the Schur form of A */
+
+	dlascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, &
+		ierr);
+	i__1 = *lda + 1;
+	dcopy_(n, &a[a_offset], &i__1, &wr[1], &c__1);
+	if (cscale == smlnum) {
+
+/*           If scaling back towards underflow, adjust WI if an */
+/*           offdiagonal element of a 2-by-2 block in the Schur form */
+/*           underflows. */
+
+	    if (ieval > 0) {
+		i1 = ieval + 1;
+		i2 = ihi - 1;
+		i__1 = ilo - 1;
+/* Computing MAX */
+		i__3 = ilo - 1;
+		i__2 = f2cmax(i__3,1);
+		dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[
+			1], &i__2, &ierr);
+	    } else if (wantst) {
+		i1 = 1;
+		i2 = *n - 1;
+	    } else {
+		i1 = ilo;
+		i2 = ihi - 1;
+	    }
+	    inxt = i1 - 1;
+	    i__1 = i2;
+	    for (i__ = i1; i__ <= i__1; ++i__) {
+		if (i__ < inxt) {
+		    goto L20;
+		}
+		if (wi[i__] == 0.) {
+		    inxt = i__ + 1;
+		} else {
+		    if (a[i__ + 1 + i__ * a_dim1] == 0.) {
+			wi[i__] = 0.;
+			wi[i__ + 1] = 0.;
+		    } else if (a[i__ + 1 + i__ * a_dim1] != 0. && a[i__ + (
+			    i__ + 1) * a_dim1] == 0.) {
+			wi[i__] = 0.;
+			wi[i__ + 1] = 0.;
+			if (i__ > 1) {
+			    i__2 = i__ - 1;
+			    dswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[(
+				    i__ + 1) * a_dim1 + 1], &c__1);
+			}
+			if (*n > i__ + 1) {
+			    i__2 = *n - i__ - 1;
+			    dswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, &
+				    a[i__ + 1 + (i__ + 2) * a_dim1], lda);
+			}
+			if (wantvs) {
+			    dswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ 
+				    + 1) * vs_dim1 + 1], &c__1);
+			}
+			a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * 
+				a_dim1];
+			a[i__ + 1 + i__ * a_dim1] = 0.;
+		    }
+		    inxt = i__ + 2;
+		}
+L20:
+		;
+	    }
+	}
+
+/*        Undo scaling for the imaginary part of the eigenvalues */
+
+	i__1 = *n - ieval;
+/* Computing MAX */
+	i__3 = *n - ieval;
+	i__2 = f2cmax(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + 
+		1], &i__2, &ierr);
+    }
+
+    if (wantst && *info == 0) {
+
+/*        Check if reordering successful */
+
+	lastsl = TRUE_;
+	lst2sl = TRUE_;
+	*sdim = 0;
+	ip = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    cursl = (*select)(&wr[i__], &wi[i__]);
+	    if (wi[i__] == 0.) {
+		if (cursl) {
+		    ++(*sdim);
+		}
+		ip = 0;
+		if (cursl && ! lastsl) {
+		    *info = *n + 2;
+		}
+	    } else {
+		if (ip == 1) {
+
+/*                 Last eigenvalue of conjugate pair */
+
+		    cursl = cursl || lastsl;
+		    lastsl = cursl;
+		    if (cursl) {
+			*sdim += 2;
+		    }
+		    ip = -1;
+		    if (cursl && ! lst2sl) {
+			*info = *n + 2;
+		    }
+		} else {
+
+/*                 First eigenvalue of conjugate pair */
+
+		    ip = 1;
+		}
+	    }
+	    lst2sl = lastsl;
+	    lastsl = cursl;
+/* L30: */
+	}
+    }
+
+    work[1] = (doublereal) maxwrk;
+    return 0;
+
+/*     End of DGEES */
+
+} /* dgees_ */
+
diff --git a/lapack-netlib/SRC/dgeesx.c b/lapack-netlib/SRC/dgeesx.c
new file mode 100644
index 000000000..4bf01895b
--- /dev/null
+++ b/lapack-netlib/SRC/dgeesx.c
@@ -0,0 +1,1124 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief <b> DGEESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors 
+for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEESX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeesx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeesx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeesx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, */
+/*                          WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, */
+/*                          IWORK, LIWORK, BWORK, INFO ) */
+
+/*       CHARACTER          JOBVS, SENSE, SORT */
+/*       INTEGER            INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM */
+/*       DOUBLE PRECISION   RCONDE, RCONDV */
+/*       LOGICAL            BWORK( * ) */
+/*       INTEGER            IWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), */
+/*      $                   WR( * ) */
+/*       LOGICAL            SELECT */
+/*       EXTERNAL           SELECT */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEESX computes for an N-by-N real nonsymmetric matrix A, the */
+/* > eigenvalues, the real Schur form T, and, optionally, the matrix of */
+/* > Schur vectors Z.  This gives the Schur factorization A = Z*T*(Z**T). */
+/* > */
+/* > Optionally, it also orders the eigenvalues on the diagonal of the */
+/* > real Schur form so that selected eigenvalues are at the top left; */
+/* > computes a reciprocal condition number for the average of the */
+/* > selected eigenvalues (RCONDE); and computes a reciprocal condition */
+/* > number for the right invariant subspace corresponding to the */
+/* > selected eigenvalues (RCONDV).  The leading columns of Z form an */
+/* > orthonormal basis for this invariant subspace. */
+/* > */
+/* > For further explanation of the reciprocal condition numbers RCONDE */
+/* > and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where */
+/* > these quantities are called s and sep respectively). */
+/* > */
+/* > A real matrix is in real Schur form if it is upper quasi-triangular */
+/* > with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in */
+/* > the form */
+/* >           [  a  b  ] */
+/* >           [  c  a  ] */
+/* > */
+/* > where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVS */
+/* > \verbatim */
+/* >          JOBVS is CHARACTER*1 */
+/* >          = 'N': Schur vectors are not computed; */
+/* >          = 'V': Schur vectors are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SORT */
+/* > \verbatim */
+/* >          SORT is CHARACTER*1 */
+/* >          Specifies whether or not to order the eigenvalues on the */
+/* >          diagonal of the Schur form. */
+/* >          = 'N': Eigenvalues are not ordered; */
+/* >          = 'S': Eigenvalues are ordered (see SELECT). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SELECT */
+/* > \verbatim */
+/* >          SELECT is a LOGICAL FUNCTION of two DOUBLE PRECISION arguments */
+/* >          SELECT must be declared EXTERNAL in the calling subroutine. */
+/* >          If SORT = 'S', SELECT is used to select eigenvalues to sort */
+/* >          to the top left of the Schur form. */
+/* >          If SORT = 'N', SELECT is not referenced. */
+/* >          An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */
+/* >          SELECT(WR(j),WI(j)) is true; i.e., if either one of a */
+/* >          complex conjugate pair of eigenvalues is selected, then both */
+/* >          are.  Note that a selected complex eigenvalue may no longer */
+/* >          satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */
+/* >          ordering may change the value of complex eigenvalues */
+/* >          (especially if the eigenvalue is ill-conditioned); in this */
+/* >          case INFO may be set to N+3 (see INFO below). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SENSE */
+/* > \verbatim */
+/* >          SENSE is CHARACTER*1 */
+/* >          Determines which reciprocal condition numbers are computed. */
+/* >          = 'N': None are computed; */
+/* >          = 'E': Computed for average of selected eigenvalues only; */
+/* >          = 'V': Computed for selected right invariant subspace only; */
+/* >          = 'B': Computed for both. */
+/* >          If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA, N) */
+/* >          On entry, the N-by-N matrix A. */
+/* >          On exit, A is overwritten by its real Schur form T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SDIM */
+/* > \verbatim */
+/* >          SDIM is INTEGER */
+/* >          If SORT = 'N', SDIM = 0. */
+/* >          If SORT = 'S', SDIM = number of eigenvalues (after sorting) */
+/* >                         for which SELECT is true. (Complex conjugate */
+/* >                         pairs for which SELECT is true for either */
+/* >                         eigenvalue count as 2.) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WR */
+/* > \verbatim */
+/* >          WR is DOUBLE PRECISION array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WI */
+/* > \verbatim */
+/* >          WI is DOUBLE PRECISION array, dimension (N) */
+/* >          WR and WI contain the real and imaginary parts, respectively, */
+/* >          of the computed eigenvalues, in the same order that they */
+/* >          appear on the diagonal of the output Schur form T.  Complex */
+/* >          conjugate pairs of eigenvalues appear consecutively with the */
+/* >          eigenvalue having the positive imaginary part first. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VS */
+/* > \verbatim */
+/* >          VS is DOUBLE PRECISION array, dimension (LDVS,N) */
+/* >          If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */
+/* >          vectors. */
+/* >          If JOBVS = 'N', VS is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVS */
+/* > \verbatim */
+/* >          LDVS is INTEGER */
+/* >          The leading dimension of the array VS.  LDVS >= 1, and if */
+/* >          JOBVS = 'V', LDVS >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDE */
+/* > \verbatim */
+/* >          RCONDE is DOUBLE PRECISION */
+/* >          If SENSE = 'E' or 'B', RCONDE contains the reciprocal */
+/* >          condition number for the average of the selected eigenvalues. */
+/* >          Not referenced if SENSE = 'N' or 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDV */
+/* > \verbatim */
+/* >          RCONDV is DOUBLE PRECISION */
+/* >          If SENSE = 'V' or 'B', RCONDV contains the reciprocal */
+/* >          condition number for the selected right invariant subspace. */
+/* >          Not referenced if SENSE = 'N' or 'E'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,3*N). */
+/* >          Also, if SENSE = 'E' or 'V' or 'B', */
+/* >          LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of */
+/* >          selected eigenvalues computed by this routine.  Note that */
+/* >          N+2*SDIM*(N-SDIM) <= N+N*N/2. Note also that an error is only */
+/* >          returned if LWORK < f2cmax(1,3*N), but if SENSE = 'E' or 'V' or */
+/* >          'B' this may not be large enough. */
+/* >          For good performance, LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates upper bounds on the optimal sizes of the */
+/* >          arrays WORK and IWORK, returns these values as the first */
+/* >          entries of the WORK and IWORK arrays, and no error messages */
+/* >          related to LWORK or LIWORK are issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
+/* >          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LIWORK */
+/* > \verbatim */
+/* >          LIWORK is INTEGER */
+/* >          The dimension of the array IWORK. */
+/* >          LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). */
+/* >          Note that SDIM*(N-SDIM) <= N*N/4. Note also that an error is */
+/* >          only returned if LIWORK < 1, but if SENSE = 'V' or 'B' this */
+/* >          may not be large enough. */
+/* > */
+/* >          If LIWORK = -1, then a workspace query is assumed; the */
+/* >          routine only calculates upper bounds on the optimal sizes of */
+/* >          the arrays WORK and IWORK, returns these values as the first */
+/* >          entries of the WORK and IWORK arrays, and no error messages */
+/* >          related to LWORK or LIWORK are issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BWORK */
+/* > \verbatim */
+/* >          BWORK is LOGICAL array, dimension (N) */
+/* >          Not referenced if SORT = 'N'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0: if INFO = i, and i is */
+/* >             <= N: the QR algorithm failed to compute all the */
+/* >                   eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */
+/* >                   contain those eigenvalues which have converged; if */
+/* >                   JOBVS = 'V', VS contains the transformation which */
+/* >                   reduces A to its partially converged Schur form. */
+/* >             = N+1: the eigenvalues could not be reordered because some */
+/* >                   eigenvalues were too close to separate (the problem */
+/* >                   is very ill-conditioned); */
+/* >             = N+2: after reordering, roundoff changed values of some */
+/* >                   complex eigenvalues so that leading eigenvalues in */
+/* >                   the Schur form no longer satisfy SELECT=.TRUE.  This */
+/* >                   could also be caused by underflow due to scaling. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup doubleGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgeesx_(char *jobvs, char *sort, L_fp select, char *
+	sense, integer *n, doublereal *a, integer *lda, integer *sdim, 
+	doublereal *wr, doublereal *wi, doublereal *vs, integer *ldvs, 
+	doublereal *rconde, doublereal *rcondv, doublereal *work, integer *
+	lwork, integer *iwork, integer *liwork, logical *bwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer ibal;
+    doublereal anrm;
+    integer ierr, itau, iwrk, lwrk, inxt, i__, icond, ieval;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *), dswap_(integer *, doublereal *, integer 
+	    *, doublereal *, integer *);
+    logical cursl;
+    integer liwrk, i1, i2;
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
+	    char *, char *, integer *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *), 
+	    dgebal_(char *, integer *, doublereal *, integer *, integer *, 
+	    integer *, doublereal *, integer *);
+    logical lst2sl, scalea;
+    integer ip;
+    doublereal cscale;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dlascl_(char *, integer *, integer *, doublereal *, 
+	    doublereal *, integer *, integer *, doublereal *, integer *, 
+	    integer *), dlacpy_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    doublereal bignum;
+    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dhseqr_(char *, char *, integer *, integer *, integer 
+	    *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *);
+    logical wantsb;
+    extern /* Subroutine */ int dtrsen_(char *, char *, logical *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *, integer *, integer *);
+    logical wantse, lastsl;
+    integer minwrk, maxwrk;
+    logical wantsn;
+    doublereal smlnum;
+    integer hswork;
+    logical wantst, lquery, wantsv, wantvs;
+    integer ihi, ilo;
+    doublereal dum[1], eps;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vs_dim1 = *ldvs;
+    vs_offset = 1 + vs_dim1 * 1;
+    vs -= vs_offset;
+    --work;
+    --iwork;
+    --bwork;
+
+    /* Function Body */
+    *info = 0;
+    wantvs = lsame_(jobvs, "V");
+    wantst = lsame_(sort, "S");
+    wantsn = lsame_(sense, "N");
+    wantse = lsame_(sense, "E");
+    wantsv = lsame_(sense, "V");
+    wantsb = lsame_(sense, "B");
+    lquery = *lwork == -1 || *liwork == -1;
+
+    if (! wantvs && ! lsame_(jobvs, "N")) {
+	*info = -1;
+    } else if (! wantst && ! lsame_(sort, "N")) {
+	*info = -2;
+    } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! 
+	    wantsn) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldvs < 1 || wantvs && *ldvs < *n) {
+	*info = -12;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "RWorkspace:" describe the */
+/*       minimal amount of real workspace needed at that point in the */
+/*       code, as well as the preferred amount for good performance. */
+/*       IWorkspace refers to integer workspace. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+/*       HSWORK refers to the workspace preferred by DHSEQR, as */
+/*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */
+/*       the worst case. */
+/*       If SENSE = 'E', 'V' or 'B', then the amount of workspace needed */
+/*       depends on SDIM, which is computed by the routine DTRSEN later */
+/*       in the code.) */
+
+    if (*info == 0) {
+	liwrk = 1;
+	if (*n == 0) {
+	    minwrk = 1;
+	    lwrk = 1;
+	} else {
+	    maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, 
+		    n, &c__0, (ftnlen)6, (ftnlen)1);
+	    minwrk = *n * 3;
+
+	    dhseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1]
+		    , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval);
+	    hswork = (integer) work[1];
+
+	    if (! wantvs) {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    } else {
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, 
+			"DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	    lwrk = maxwrk;
+	    if (! wantsn) {
+/* Computing MAX */
+		i__1 = lwrk, i__2 = *n + *n * *n / 2;
+		lwrk = f2cmax(i__1,i__2);
+	    }
+	    if (wantsv || wantsb) {
+		liwrk = *n * *n / 4;
+	    }
+	}
+	iwork[1] = liwrk;
+	work[1] = (doublereal) lwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -16;
+	} else if (*liwork < 1 && ! lquery) {
+	    *info = -18;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEESX", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	*sdim = 0;
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = dlamch_("P");
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0. && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Permute the matrix to make it more nearly triangular */
+/*     (RWorkspace: need N) */
+
+    ibal = 1;
+    dgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*     Reduce to upper Hessenberg form */
+/*     (RWorkspace: need 3*N, prefer 2*N+N*NB) */
+
+    itau = *n + ibal;
+    iwrk = *n + itau;
+    i__1 = *lwork - iwrk + 1;
+    dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvs) {
+
+/*        Copy Householder vectors to VS */
+
+	dlacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs)
+		;
+
+/*        Generate orthogonal matrix in VS */
+/*        (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+    }
+
+    *sdim = 0;
+
+/*     Perform QR iteration, accumulating Schur vectors in VS if desired */
+/*     (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+    iwrk = itau;
+    i__1 = *lwork - iwrk + 1;
+    dhseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[
+	    vs_offset], ldvs, &work[iwrk], &i__1, &ieval);
+    if (ieval > 0) {
+	*info = ieval;
+    }
+
+/*     Sort eigenvalues if desired */
+
+    if (wantst && *info == 0) {
+	if (scalea) {
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, &
+		    ierr);
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, &
+		    ierr);
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    bwork[i__] = (*select)(&wr[i__], &wi[i__]);
+/* L10: */
+	}
+
+/*        Reorder eigenvalues, transform Schur vectors, and compute */
+/*        reciprocal condition numbers */
+/*        (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) */
+/*                     otherwise, need N ) */
+/*        (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) */
+/*                     otherwise, need 0 ) */
+
+	i__1 = *lwork - iwrk + 1;
+	dtrsen_(sense, jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset],
+		 ldvs, &wr[1], &wi[1], sdim, rconde, rcondv, &work[iwrk], &
+		i__1, &iwork[1], liwork, &icond);
+	if (! wantsn) {
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + (*sdim << 1) * (*n - *sdim);
+	    maxwrk = f2cmax(i__1,i__2);
+	}
+	if (icond == -15) {
+
+/*           Not enough real workspace */
+
+	    *info = -16;
+	} else if (icond == -17) {
+
+/*           Not enough integer workspace */
+
+	    *info = -18;
+	} else if (icond > 0) {
+
+/*           DTRSEN failed to reorder or to restore standard Schur form */
+
+	    *info = icond + *n;
+	}
+    }
+
+    if (wantvs) {
+
+/*        Undo balancing */
+/*        (RWorkspace: need N) */
+
+	dgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs,
+		 &ierr);
+    }
+
+    if (scalea) {
+
+/*        Undo scaling for the Schur form of A */
+
+	dlascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, &
+		ierr);
+	i__1 = *lda + 1;
+	dcopy_(n, &a[a_offset], &i__1, &wr[1], &c__1);
+	if ((wantsv || wantsb) && *info == 0) {
+	    dum[0] = *rcondv;
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &
+		    c__1, &ierr);
+	    *rcondv = dum[0];
+	}
+	if (cscale == smlnum) {
+
+/*           If scaling back towards underflow, adjust WI if an */
+/*           offdiagonal element of a 2-by-2 block in the Schur form */
+/*           underflows. */
+
+	    if (ieval > 0) {
+		i1 = ieval + 1;
+		i2 = ihi - 1;
+		i__1 = ilo - 1;
+		dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[
+			1], n, &ierr);
+	    } else if (wantst) {
+		i1 = 1;
+		i2 = *n - 1;
+	    } else {
+		i1 = ilo;
+		i2 = ihi - 1;
+	    }
+	    inxt = i1 - 1;
+	    i__1 = i2;
+	    for (i__ = i1; i__ <= i__1; ++i__) {
+		if (i__ < inxt) {
+		    goto L20;
+		}
+		if (wi[i__] == 0.) {
+		    inxt = i__ + 1;
+		} else {
+		    if (a[i__ + 1 + i__ * a_dim1] == 0.) {
+			wi[i__] = 0.;
+			wi[i__ + 1] = 0.;
+		    } else if (a[i__ + 1 + i__ * a_dim1] != 0. && a[i__ + (
+			    i__ + 1) * a_dim1] == 0.) {
+			wi[i__] = 0.;
+			wi[i__ + 1] = 0.;
+			if (i__ > 1) {
+			    i__2 = i__ - 1;
+			    dswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[(
+				    i__ + 1) * a_dim1 + 1], &c__1);
+			}
+			if (*n > i__ + 1) {
+			    i__2 = *n - i__ - 1;
+			    dswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, &
+				    a[i__ + 1 + (i__ + 2) * a_dim1], lda);
+			}
+			if (wantvs) {
+			    dswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ 
+				    + 1) * vs_dim1 + 1], &c__1);
+			}
+			a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * 
+				a_dim1];
+			a[i__ + 1 + i__ * a_dim1] = 0.;
+		    }
+		    inxt = i__ + 2;
+		}
+L20:
+		;
+	    }
+	}
+	i__1 = *n - ieval;
+/* Computing MAX */
+	i__3 = *n - ieval;
+	i__2 = f2cmax(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + 
+		1], &i__2, &ierr);
+    }
+
+    if (wantst && *info == 0) {
+
+/*        Check if reordering successful */
+
+	lastsl = TRUE_;
+	lst2sl = TRUE_;
+	*sdim = 0;
+	ip = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    cursl = (*select)(&wr[i__], &wi[i__]);
+	    if (wi[i__] == 0.) {
+		if (cursl) {
+		    ++(*sdim);
+		}
+		ip = 0;
+		if (cursl && ! lastsl) {
+		    *info = *n + 2;
+		}
+	    } else {
+		if (ip == 1) {
+
+/*                 Last eigenvalue of conjugate pair */
+
+		    cursl = cursl || lastsl;
+		    lastsl = cursl;
+		    if (cursl) {
+			*sdim += 2;
+		    }
+		    ip = -1;
+		    if (cursl && ! lst2sl) {
+			*info = *n + 2;
+		    }
+		} else {
+
+/*                 First eigenvalue of conjugate pair */
+
+		    ip = 1;
+		}
+	    }
+	    lst2sl = lastsl;
+	    lastsl = cursl;
+/* L30: */
+	}
+    }
+
+    work[1] = (doublereal) maxwrk;
+    if (wantsv || wantsb) {
+/* Computing MAX */
+	i__1 = 1, i__2 = *sdim * (*n - *sdim);
+	iwork[1] = f2cmax(i__1,i__2);
+    } else {
+	iwork[1] = 1;
+    }
+
+    return 0;
+
+/*     End of DGEESX */
+
+} /* dgeesx_ */
+
diff --git a/lapack-netlib/SRC/dgeev.c b/lapack-netlib/SRC/dgeev.c
new file mode 100644
index 000000000..6ded77f0c
--- /dev/null
+++ b/lapack-netlib/SRC/dgeev.c
@@ -0,0 +1,1039 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief <b> DGEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matr
+ices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEEV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeev.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeev.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeev.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, */
+/*                         LDVR, WORK, LWORK, INFO ) */
+
+/*       CHARACTER          JOBVL, JOBVR */
+/*       INTEGER            INFO, LDA, LDVL, LDVR, LWORK, N */
+/*       DOUBLE PRECISION   A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), */
+/*      $                   WI( * ), WORK( * ), WR( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEEV computes for an N-by-N real nonsymmetric matrix A, the */
+/* > eigenvalues and, optionally, the left and/or right eigenvectors. */
+/* > */
+/* > The right eigenvector v(j) of A satisfies */
+/* >                  A * v(j) = lambda(j) * v(j) */
+/* > where lambda(j) is its eigenvalue. */
+/* > The left eigenvector u(j) of A satisfies */
+/* >               u(j)**H * A = lambda(j) * u(j)**H */
+/* > where u(j)**H denotes the conjugate-transpose of u(j). */
+/* > */
+/* > The computed eigenvectors are normalized to have Euclidean norm */
+/* > equal to 1 and largest component real. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBVL */
+/* > \verbatim */
+/* >          JOBVL is CHARACTER*1 */
+/* >          = 'N': left eigenvectors of A are not computed; */
+/* >          = 'V': left eigenvectors of A are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVR */
+/* > \verbatim */
+/* >          JOBVR is CHARACTER*1 */
+/* >          = 'N': right eigenvectors of A are not computed; */
+/* >          = 'V': right eigenvectors of A are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the N-by-N matrix A. */
+/* >          On exit, A has been overwritten. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WR */
+/* > \verbatim */
+/* >          WR is DOUBLE PRECISION array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WI */
+/* > \verbatim */
+/* >          WI is DOUBLE PRECISION array, dimension (N) */
+/* >          WR and WI contain the real and imaginary parts, */
+/* >          respectively, of the computed eigenvalues.  Complex */
+/* >          conjugate pairs of eigenvalues appear consecutively */
+/* >          with the eigenvalue having the positive imaginary part */
+/* >          first. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VL */
+/* > \verbatim */
+/* >          VL is DOUBLE PRECISION array, dimension (LDVL,N) */
+/* >          If JOBVL = 'V', the left eigenvectors u(j) are stored one */
+/* >          after another in the columns of VL, in the same order */
+/* >          as their eigenvalues. */
+/* >          If JOBVL = 'N', VL is not referenced. */
+/* >          If the j-th eigenvalue is real, then u(j) = VL(:,j), */
+/* >          the j-th column of VL. */
+/* >          If the j-th and (j+1)-st eigenvalues form a complex */
+/* >          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */
+/* >          u(j+1) = VL(:,j) - i*VL(:,j+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVL */
+/* > \verbatim */
+/* >          LDVL is INTEGER */
+/* >          The leading dimension of the array VL.  LDVL >= 1; if */
+/* >          JOBVL = 'V', LDVL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VR */
+/* > \verbatim */
+/* >          VR is DOUBLE PRECISION array, dimension (LDVR,N) */
+/* >          If JOBVR = 'V', the right eigenvectors v(j) are stored one */
+/* >          after another in the columns of VR, in the same order */
+/* >          as their eigenvalues. */
+/* >          If JOBVR = 'N', VR is not referenced. */
+/* >          If the j-th eigenvalue is real, then v(j) = VR(:,j), */
+/* >          the j-th column of VR. */
+/* >          If the j-th and (j+1)-st eigenvalues form a complex */
+/* >          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */
+/* >          v(j+1) = VR(:,j) - i*VR(:,j+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVR */
+/* > \verbatim */
+/* >          LDVR is INTEGER */
+/* >          The leading dimension of the array VR.  LDVR >= 1; if */
+/* >          JOBVR = 'V', LDVR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,3*N), and */
+/* >          if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good */
+/* >          performance, LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if INFO = i, the QR algorithm failed to compute all the */
+/* >                eigenvalues, and no eigenvectors have been computed; */
+/* >                elements i+1:N of WR and WI contain eigenvalues which */
+/* >                have converged. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/*  @precisions fortran d -> s */
+
+/* > \ingroup doubleGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *
+	a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, 
+	integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, 
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
+	    i__2, i__3;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    integer ibal;
+    char side[1];
+    doublereal anrm;
+    integer ierr, itau;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *);
+    integer iwrk, nout;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    integer i__, k;
+    doublereal r__;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
+	    char *, char *, integer *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *), 
+	    dgebal_(char *, integer *, doublereal *, integer *, integer *, 
+	    integer *, doublereal *, integer *);
+    doublereal cs;
+    logical scalea;
+    extern doublereal dlamch_(char *);
+    doublereal cscale;
+    extern doublereal dlange_(char *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *);
+    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *);
+    doublereal sn;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *), 
+	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, 
+	    doublereal *), xerbla_(char *, integer *, ftnlen);
+    logical select[1];
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    doublereal bignum;
+    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dhseqr_(char *, char *, integer *, integer *, integer 
+	    *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *);
+    integer minwrk, maxwrk;
+    logical wantvl;
+    doublereal smlnum;
+    integer hswork;
+    logical lquery, wantvr;
+    extern /* Subroutine */ int dtrevc3_(char *, char *, logical *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, integer *, integer *, doublereal *, integer *, integer 
+	    *);
+    integer ihi;
+    doublereal scl;
+    integer ilo;
+    doublereal dum[1], eps;
+    integer lwork_trevc__;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1 * 1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1 * 1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -1;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
+	*info = -9;
+    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
+	*info = -11;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+/*       HSWORK refers to the workspace preferred by DHSEQR, as */
+/*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */
+/*       the worst case.) */
+
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
+	} else {
+	    maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, 
+		    n, &c__0, (ftnlen)6, (ftnlen)1);
+	    if (wantvl) {
+		minwrk = *n << 2;
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, 
+			"DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = f2cmax(i__1,i__2);
+		dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vl[vl_offset], ldvl, &work[1], &c_n1, info);
+		hswork = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+		dtrevc3_("L", "B", select, n, &a[a_offset], lda, &vl[
+			vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &
+			work[1], &c_n1, &ierr);
+		lwork_trevc__ = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_trevc__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n << 2;
+		maxwrk = f2cmax(i__1,i__2);
+	    } else if (wantvr) {
+		minwrk = *n << 2;
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, 
+			"DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = f2cmax(i__1,i__2);
+		dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+		hswork = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+		dtrevc3_("R", "B", select, n, &a[a_offset], lda, &vl[
+			vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &
+			work[1], &c_n1, &ierr);
+		lwork_trevc__ = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_trevc__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n << 2;
+		maxwrk = f2cmax(i__1,i__2);
+	    } else {
+		minwrk = *n * 3;
+		dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+		hswork = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	    maxwrk = f2cmax(maxwrk,minwrk);
+	}
+	work[1] = (doublereal) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEEV ", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = dlamch_("P");
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0. && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Balance the matrix */
+/*     (Workspace: need N) */
+
+    ibal = 1;
+    dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*     Reduce to upper Hessenberg form */
+/*     (Workspace: need 3*N, prefer 2*N+N*NB) */
+
+    itau = ibal + *n;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvl) {
+
+/*        Want left eigenvectors */
+/*        Copy Householder vectors to VL */
+
+	*(unsigned char *)side = 'L';
+	dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*        Generate orthogonal matrix in VL */
+/*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*        Perform QR iteration, accumulating Schur vectors in VL */
+/*        (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vl[vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*           Want left and right eigenvectors */
+/*           Copy Schur vectors to VR */
+
+	    *(unsigned char *)side = 'B';
+	    dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*        Want right eigenvectors */
+/*        Copy Householder vectors to VR */
+
+	*(unsigned char *)side = 'R';
+	dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*        Generate orthogonal matrix in VR */
+/*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*        Perform QR iteration, accumulating Schur vectors in VR */
+/*        (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*        Compute eigenvalues only */
+/*        (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO .NE. 0 from DHSEQR, then quit */
+
+    if (*info != 0) {
+	goto L50;
+    }
+
+    if (wantvl || wantvr) {
+
+/*        Compute left and/or right eigenvectors */
+/*        (Workspace: need 4*N, prefer N + N + 2*N*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	dtrevc3_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], 
+		ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &i__1, &
+		ierr);
+    }
+
+    if (wantvl) {
+
+/*        Undo balancing of left eigenvectors */
+/*        (Workspace: need N) */
+
+	dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
+		 &ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    d__1 = vl[k + i__ * vl_dim1];
+/* Computing 2nd power */
+		    d__2 = vl[k + (i__ + 1) * vl_dim1];
+		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
+/* L10: */
+		}
+		k = idamax_(n, &work[iwrk], &c__1);
+		dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], 
+			&cs, &sn, &r__);
+		drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * 
+			vl_dim1 + 1], &c__1, &cs, &sn);
+		vl[k + (i__ + 1) * vl_dim1] = 0.;
+	    }
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*        Undo balancing of right eigenvectors */
+/*        (Workspace: need N) */
+
+	dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
+		 &ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    d__1 = vr[k + i__ * vr_dim1];
+/* Computing 2nd power */
+		    d__2 = vr[k + (i__ + 1) * vr_dim1];
+		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
+/* L30: */
+		}
+		k = idamax_(n, &work[iwrk], &c__1);
+		dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], 
+			&cs, &sn, &r__);
+		drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * 
+			vr_dim1 + 1], &c__1, &cs, &sn);
+		vr[k + (i__ + 1) * vr_dim1] = 0.;
+	    }
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = f2cmax(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 
+		1], &i__2, &ierr);
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = f2cmax(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 
+		1], &i__2, &ierr);
+	if (*info > 0) {
+	    i__1 = ilo - 1;
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], 
+		    n, &ierr);
+	    i__1 = ilo - 1;
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], 
+		    n, &ierr);
+	}
+    }
+
+    work[1] = (doublereal) maxwrk;
+    return 0;
+
+/*     End of DGEEV */
+
+} /* dgeev_ */
+
diff --git a/lapack-netlib/SRC/dgeevx.c b/lapack-netlib/SRC/dgeevx.c
new file mode 100644
index 000000000..1de111683
--- /dev/null
+++ b/lapack-netlib/SRC/dgeevx.c
@@ -0,0 +1,1206 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief <b> DGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat
+rices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEEVX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeevx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeevx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeevx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, */
+/*                          VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, */
+/*                          RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) */
+
+/*       CHARACTER          BALANC, JOBVL, JOBVR, SENSE */
+/*       INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N */
+/*       DOUBLE PRECISION   ABNRM */
+/*       INTEGER            IWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), RCONDE( * ), RCONDV( * ), */
+/*      $                   SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), */
+/*      $                   WI( * ), WORK( * ), WR( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEEVX computes for an N-by-N real nonsymmetric matrix A, the */
+/* > eigenvalues and, optionally, the left and/or right eigenvectors. */
+/* > */
+/* > Optionally also, it computes a balancing transformation to improve */
+/* > the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */
+/* > SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues */
+/* > (RCONDE), and reciprocal condition numbers for the right */
+/* > eigenvectors (RCONDV). */
+/* > */
+/* > The right eigenvector v(j) of A satisfies */
+/* >                  A * v(j) = lambda(j) * v(j) */
+/* > where lambda(j) is its eigenvalue. */
+/* > The left eigenvector u(j) of A satisfies */
+/* >               u(j)**H * A = lambda(j) * u(j)**H */
+/* > where u(j)**H denotes the conjugate-transpose of u(j). */
+/* > */
+/* > The computed eigenvectors are normalized to have Euclidean norm */
+/* > equal to 1 and largest component real. */
+/* > */
+/* > Balancing a matrix means permuting the rows and columns to make it */
+/* > more nearly upper triangular, and applying a diagonal similarity */
+/* > transformation D * A * D**(-1), where D is a diagonal matrix, to */
+/* > make its rows and columns closer in norm and the condition numbers */
+/* > of its eigenvalues and eigenvectors smaller.  The computed */
+/* > reciprocal condition numbers correspond to the balanced matrix. */
+/* > Permuting rows and columns will not change the condition numbers */
+/* > (in exact arithmetic) but diagonal scaling will.  For further */
+/* > explanation of balancing, see section 4.10.2 of the LAPACK */
+/* > Users' Guide. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] BALANC */
+/* > \verbatim */
+/* >          BALANC is CHARACTER*1 */
+/* >          Indicates how the input matrix should be diagonally scaled */
+/* >          and/or permuted to improve the conditioning of its */
+/* >          eigenvalues. */
+/* >          = 'N': Do not diagonally scale or permute; */
+/* >          = 'P': Perform permutations to make the matrix more nearly */
+/* >                 upper triangular. Do not diagonally scale; */
+/* >          = 'S': Diagonally scale the matrix, i.e. replace A by */
+/* >                 D*A*D**(-1), where D is a diagonal matrix chosen */
+/* >                 to make the rows and columns of A more equal in */
+/* >                 norm. Do not permute; */
+/* >          = 'B': Both diagonally scale and permute A. */
+/* > */
+/* >          Computed reciprocal condition numbers will be for the matrix */
+/* >          after balancing and/or permuting. Permuting does not change */
+/* >          condition numbers (in exact arithmetic), but balancing does. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVL */
+/* > \verbatim */
+/* >          JOBVL is CHARACTER*1 */
+/* >          = 'N': left eigenvectors of A are not computed; */
+/* >          = 'V': left eigenvectors of A are computed. */
+/* >          If SENSE = 'E' or 'B', JOBVL must = 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVR */
+/* > \verbatim */
+/* >          JOBVR is CHARACTER*1 */
+/* >          = 'N': right eigenvectors of A are not computed; */
+/* >          = 'V': right eigenvectors of A are computed. */
+/* >          If SENSE = 'E' or 'B', JOBVR must = 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] SENSE */
+/* > \verbatim */
+/* >          SENSE is CHARACTER*1 */
+/* >          Determines which reciprocal condition numbers are computed. */
+/* >          = 'N': None are computed; */
+/* >          = 'E': Computed for eigenvalues only; */
+/* >          = 'V': Computed for right eigenvectors only; */
+/* >          = 'B': Computed for eigenvalues and right eigenvectors. */
+/* > */
+/* >          If SENSE = 'E' or 'B', both left and right eigenvectors */
+/* >          must also be computed (JOBVL = 'V' and JOBVR = 'V'). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the N-by-N matrix A. */
+/* >          On exit, A has been overwritten.  If JOBVL = 'V' or */
+/* >          JOBVR = 'V', A contains the real Schur form of the balanced */
+/* >          version of the input matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WR */
+/* > \verbatim */
+/* >          WR is DOUBLE PRECISION array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WI */
+/* > \verbatim */
+/* >          WI is DOUBLE PRECISION array, dimension (N) */
+/* >          WR and WI contain the real and imaginary parts, */
+/* >          respectively, of the computed eigenvalues.  Complex */
+/* >          conjugate pairs of eigenvalues will appear consecutively */
+/* >          with the eigenvalue having the positive imaginary part */
+/* >          first. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VL */
+/* > \verbatim */
+/* >          VL is DOUBLE PRECISION array, dimension (LDVL,N) */
+/* >          If JOBVL = 'V', the left eigenvectors u(j) are stored one */
+/* >          after another in the columns of VL, in the same order */
+/* >          as their eigenvalues. */
+/* >          If JOBVL = 'N', VL is not referenced. */
+/* >          If the j-th eigenvalue is real, then u(j) = VL(:,j), */
+/* >          the j-th column of VL. */
+/* >          If the j-th and (j+1)-st eigenvalues form a complex */
+/* >          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */
+/* >          u(j+1) = VL(:,j) - i*VL(:,j+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVL */
+/* > \verbatim */
+/* >          LDVL is INTEGER */
+/* >          The leading dimension of the array VL.  LDVL >= 1; if */
+/* >          JOBVL = 'V', LDVL >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VR */
+/* > \verbatim */
+/* >          VR is DOUBLE PRECISION array, dimension (LDVR,N) */
+/* >          If JOBVR = 'V', the right eigenvectors v(j) are stored one */
+/* >          after another in the columns of VR, in the same order */
+/* >          as their eigenvalues. */
+/* >          If JOBVR = 'N', VR is not referenced. */
+/* >          If the j-th eigenvalue is real, then v(j) = VR(:,j), */
+/* >          the j-th column of VR. */
+/* >          If the j-th and (j+1)-st eigenvalues form a complex */
+/* >          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */
+/* >          v(j+1) = VR(:,j) - i*VR(:,j+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVR */
+/* > \verbatim */
+/* >          LDVR is INTEGER */
+/* >          The leading dimension of the array VR.  LDVR >= 1, and if */
+/* >          JOBVR = 'V', LDVR >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* >          ILO and IHI are integer values determined when A was */
+/* >          balanced.  The balanced A(i,j) = 0 if I > J and */
+/* >          J = 1,...,ILO-1 or I = IHI+1,...,N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is DOUBLE PRECISION array, dimension (N) */
+/* >          Details of the permutations and scaling factors applied */
+/* >          when balancing A.  If P(j) is the index of the row and column */
+/* >          interchanged with row and column j, and D(j) is the scaling */
+/* >          factor applied to row and column j, then */
+/* >          SCALE(J) = P(J),    for J = 1,...,ILO-1 */
+/* >                   = D(J),    for J = ILO,...,IHI */
+/* >                   = P(J)     for J = IHI+1,...,N. */
+/* >          The order in which the interchanges are made is N to IHI+1, */
+/* >          then 1 to ILO-1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ABNRM */
+/* > \verbatim */
+/* >          ABNRM is DOUBLE PRECISION */
+/* >          The one-norm of the balanced matrix (the maximum */
+/* >          of the sum of absolute values of elements of any column). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDE */
+/* > \verbatim */
+/* >          RCONDE is DOUBLE PRECISION array, dimension (N) */
+/* >          RCONDE(j) is the reciprocal condition number of the j-th */
+/* >          eigenvalue. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCONDV */
+/* > \verbatim */
+/* >          RCONDV is DOUBLE PRECISION array, dimension (N) */
+/* >          RCONDV(j) is the reciprocal condition number of the j-th */
+/* >          right eigenvector. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.   If SENSE = 'N' or 'E', */
+/* >          LWORK >= f2cmax(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', */
+/* >          LWORK >= 3*N.  If SENSE = 'V' or 'B', LWORK >= N*(N+6). */
+/* >          For good performance, LWORK must generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (2*N-2) */
+/* >          If SENSE = 'N' or 'E', not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if INFO = i, the QR algorithm failed to compute all the */
+/* >                eigenvalues, and no eigenvectors or condition numbers */
+/* >                have been computed; elements 1:ILO-1 and i+1:N of WR */
+/* >                and WI contain eigenvalues which have converged. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/*  @precisions fortran d -> s */
+
+/* > \ingroup doubleGEeigen */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgeevx_(char *balanc, char *jobvl, char *jobvr, char *
+	sense, integer *n, doublereal *a, integer *lda, doublereal *wr, 
+	doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, 
+	integer *ldvr, integer *ilo, integer *ihi, doublereal *scale, 
+	doublereal *abnrm, doublereal *rconde, doublereal *rcondv, doublereal 
+	*work, integer *lwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
+	    i__2, i__3;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    char side[1];
+    doublereal anrm;
+    integer ierr, itau;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *);
+    integer iwrk, nout;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    integer i__, k;
+    doublereal r__;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    integer icond;
+    extern logical lsame_(char *, char *);
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
+	    char *, char *, integer *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *), 
+	    dgebal_(char *, integer *, doublereal *, integer *, integer *, 
+	    integer *, doublereal *, integer *);
+    doublereal cs;
+    logical scalea;
+    extern doublereal dlamch_(char *);
+    doublereal cscale;
+    extern doublereal dlange_(char *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *);
+    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *);
+    doublereal sn;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *), 
+	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, 
+	    doublereal *), xerbla_(char *, integer *, ftnlen);
+    logical select[1];
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    doublereal bignum;
+    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dhseqr_(char *, char *, integer *, integer *, integer 
+	    *, doublereal *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *), dtrsna_(char *, char *, logical *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *, 
+	    doublereal *, integer *, integer *, integer *);
+    integer minwrk, maxwrk;
+    logical wantvl, wntsnb;
+    integer hswork;
+    logical wntsne;
+    doublereal smlnum;
+    logical lquery, wantvr, wntsnn, wntsnv;
+    extern /* Subroutine */ int dtrevc3_(char *, char *, logical *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, integer *, integer *, doublereal *, integer *, integer 
+	    *);
+    char job[1];
+    doublereal scl, dum[1], eps;
+    integer lwork_trevc__;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1 * 1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1 * 1;
+    vr -= vr_offset;
+    --scale;
+    --rconde;
+    --rcondv;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    wntsnn = lsame_(sense, "N");
+    wntsne = lsame_(sense, "E");
+    wntsnv = lsame_(sense, "V");
+    wntsnb = lsame_(sense, "B");
+    if (! (lsame_(balanc, "N") || lsame_(balanc, "S") || lsame_(balanc, "P") 
+	    || lsame_(balanc, "B"))) {
+	*info = -1;
+    } else if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -2;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -3;
+    } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb) 
+	    && ! (wantvl && wantvr)) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
+	*info = -11;
+    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
+	*info = -13;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+/*       HSWORK refers to the workspace preferred by DHSEQR, as */
+/*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */
+/*       the worst case.) */
+
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
+	} else {
+	    maxwrk = *n + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, &
+		    c__0, (ftnlen)6, (ftnlen)1);
+
+	    if (wantvl) {
+		dtrevc3_("L", "B", select, n, &a[a_offset], lda, &vl[
+			vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &
+			work[1], &c_n1, &ierr);
+		lwork_trevc__ = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_trevc__;
+		maxwrk = f2cmax(i__1,i__2);
+		dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vl[vl_offset], ldvl, &work[1], &c_n1, info);
+	    } else if (wantvr) {
+		dtrevc3_("R", "B", select, n, &a[a_offset], lda, &vl[
+			vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &
+			work[1], &c_n1, &ierr);
+		lwork_trevc__ = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_trevc__;
+		maxwrk = f2cmax(i__1,i__2);
+		dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+	    } else {
+		if (wntsnn) {
+		    dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], 
+			    &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, 
+			    info);
+		} else {
+		    dhseqr_("S", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], 
+			    &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, 
+			    info);
+		}
+	    }
+	    hswork = (integer) work[1];
+
+	    if (! wantvl && ! wantvr) {
+		minwrk = *n << 1;
+		if (! wntsnn) {
+/* Computing MAX */
+		    i__1 = minwrk, i__2 = *n * *n + *n * 6;
+		    minwrk = f2cmax(i__1,i__2);
+		}
+		maxwrk = f2cmax(maxwrk,hswork);
+		if (! wntsnn) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *n * *n + *n * 6;
+		    maxwrk = f2cmax(i__1,i__2);
+		}
+	    } else {
+		minwrk = *n * 3;
+		if (! wntsnn && ! wntsne) {
+/* Computing MAX */
+		    i__1 = minwrk, i__2 = *n * *n + *n * 6;
+		    minwrk = f2cmax(i__1,i__2);
+		}
+		maxwrk = f2cmax(maxwrk,hswork);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "DORGHR",
+			 " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		maxwrk = f2cmax(i__1,i__2);
+		if (! wntsnn && ! wntsne) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *n * *n + *n * 6;
+		    maxwrk = f2cmax(i__1,i__2);
+		}
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * 3;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	    maxwrk = f2cmax(maxwrk,minwrk);
+	}
+	work[1] = (doublereal) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -21;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEEVX", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = dlamch_("P");
+    smlnum = dlamch_("S");
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    icond = 0;
+    anrm = dlange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0. && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*     Balance the matrix and compute ABNRM */
+
+    dgebal_(balanc, n, &a[a_offset], lda, ilo, ihi, &scale[1], &ierr);
+    *abnrm = dlange_("1", n, n, &a[a_offset], lda, dum);
+    if (scalea) {
+	dum[0] = *abnrm;
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, &
+		ierr);
+	*abnrm = dum[0];
+    }
+
+/*     Reduce to upper Hessenberg form */
+/*     (Workspace: need 2*N, prefer N+N*NB) */
+
+    itau = 1;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    dgehrd_(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &
+	    ierr);
+
+    if (wantvl) {
+
+/*        Want left eigenvectors */
+/*        Copy Householder vectors to VL */
+
+	*(unsigned char *)side = 'L';
+	dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*        Generate orthogonal matrix in VL */
+/*        (Workspace: need 2*N-1, prefer N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], &
+		i__1, &ierr);
+
+/*        Perform QR iteration, accumulating Schur vectors in VL */
+/*        (Workspace: need 1, prefer HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[
+		vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*           Want left and right eigenvectors */
+/*           Copy Schur vectors to VR */
+
+	    *(unsigned char *)side = 'B';
+	    dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*        Want right eigenvectors */
+/*        Copy Householder vectors to VR */
+
+	*(unsigned char *)side = 'R';
+	dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*        Generate orthogonal matrix in VR */
+/*        (Workspace: need 2*N-1, prefer N+(N-1)*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], &
+		i__1, &ierr);
+
+/*        Perform QR iteration, accumulating Schur vectors in VR */
+/*        (Workspace: need 1, prefer HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*        Compute eigenvalues only */
+/*        If condition numbers desired, compute Schur form */
+
+	if (wntsnn) {
+	    *(unsigned char *)job = 'E';
+	} else {
+	    *(unsigned char *)job = 'S';
+	}
+
+/*        (Workspace: need 1, prefer HSWORK (see comments) ) */
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_(job, "N", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO .NE. 0 from DHSEQR, then quit */
+
+    if (*info != 0) {
+	goto L50;
+    }
+
+    if (wantvl || wantvr) {
+
+/*        Compute left and/or right eigenvectors */
+/*        (Workspace: need 3*N, prefer N + 2*N*NB) */
+
+	i__1 = *lwork - iwrk + 1;
+	dtrevc3_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], 
+		ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &i__1, &
+		ierr);
+    }
+
+/*     Compute condition numbers if desired */
+/*     (Workspace: need N*N+6*N unless SENSE = 'E') */
+
+    if (! wntsnn) {
+	dtrsna_(sense, "A", select, n, &a[a_offset], lda, &vl[vl_offset], 
+		ldvl, &vr[vr_offset], ldvr, &rconde[1], &rcondv[1], n, &nout, 
+		&work[iwrk], n, &iwork[1], &icond);
+    }
+
+    if (wantvl) {
+
+/*        Undo balancing of left eigenvectors */
+
+	dgebak_(balanc, "L", n, ilo, ihi, &scale[1], n, &vl[vl_offset], ldvl, 
+		&ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    d__1 = vl[k + i__ * vl_dim1];
+/* Computing 2nd power */
+		    d__2 = vl[k + (i__ + 1) * vl_dim1];
+		    work[k] = d__1 * d__1 + d__2 * d__2;
+/* L10: */
+		}
+		k = idamax_(n, &work[1], &c__1);
+		dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], 
+			&cs, &sn, &r__);
+		drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * 
+			vl_dim1 + 1], &c__1, &cs, &sn);
+		vl[k + (i__ + 1) * vl_dim1] = 0.;
+	    }
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*        Undo balancing of right eigenvectors */
+
+	dgebak_(balanc, "R", n, ilo, ihi, &scale[1], n, &vr[vr_offset], ldvr, 
+		&ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    d__1 = vr[k + i__ * vr_dim1];
+/* Computing 2nd power */
+		    d__2 = vr[k + (i__ + 1) * vr_dim1];
+		    work[k] = d__1 * d__1 + d__2 * d__2;
+/* L30: */
+		}
+		k = idamax_(n, &work[1], &c__1);
+		dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], 
+			&cs, &sn, &r__);
+		drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * 
+			vr_dim1 + 1], &c__1, &cs, &sn);
+		vr[k + (i__ + 1) * vr_dim1] = 0.;
+	    }
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = f2cmax(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 
+		1], &i__2, &ierr);
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = f2cmax(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 
+		1], &i__2, &ierr);
+	if (*info == 0) {
+	    if ((wntsnv || wntsnb) && icond == 0) {
+		dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &rcondv[
+			1], n, &ierr);
+	    }
+	} else {
+	    i__1 = *ilo - 1;
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], 
+		    n, &ierr);
+	    i__1 = *ilo - 1;
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], 
+		    n, &ierr);
+	}
+    }
+
+    work[1] = (doublereal) maxwrk;
+    return 0;
+
+/*     End of DGEEVX */
+
+} /* dgeevx_ */
+
diff --git a/lapack-netlib/SRC/dgehd2.c b/lapack-netlib/SRC/dgehd2.c
new file mode 100644
index 000000000..27e90b1e7
--- /dev/null
+++ b/lapack-netlib/SRC/dgehd2.c
@@ -0,0 +1,628 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b DGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. 
+*/
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEHD2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgehd2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgehd2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgehd2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            IHI, ILO, INFO, LDA, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEHD2 reduces a real general matrix A to upper Hessenberg form H by */
+/* > an orthogonal similarity transformation:  Q**T * A * Q = H . */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* > */
+/* >          It is assumed that A is already upper triangular in rows */
+/* >          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
+/* >          set by a previous call to DGEBAL; otherwise they should be */
+/* >          set to 1 and N respectively. See Further Details. */
+/* >          1 <= ILO <= IHI <= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the n by n general matrix to be reduced. */
+/* >          On exit, the upper triangle and the first subdiagonal of A */
+/* >          are overwritten with the upper Hessenberg matrix H, and the */
+/* >          elements below the first subdiagonal, with the array TAU, */
+/* >          represent the orthogonal matrix Q as a product of elementary */
+/* >          reflectors. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (N-1) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of (ihi-ilo) elementary */
+/* >  reflectors */
+/* > */
+/* >     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
+/* >  exit in A(i+2:ihi,i), and tau in TAU(i). */
+/* > */
+/* >  The contents of A are illustrated by the following example, with */
+/* >  n = 7, ilo = 2 and ihi = 6: */
+/* > */
+/* >  on entry,                        on exit, */
+/* > */
+/* >  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
+/* >  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
+/* >  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
+/* >  (                         a )    (                          a ) */
+/* > */
+/* >  where a denotes an element of the original matrix A, h denotes a */
+/* >  modified element of the upper Hessenberg matrix H, and vi denotes an */
+/* >  element of the vector defining H(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi, 
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *), dlarfg_(integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
+    doublereal aii;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
+	*info = -2;
+    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEHD2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    i__1 = *ihi - 1;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+
+/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
+
+	i__2 = *ihi - i__;
+/* Computing MIN */
+	i__3 = i__ + 2;
+	dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*n) + i__ * 
+		a_dim1], &c__1, &tau[i__]);
+	aii = a[i__ + 1 + i__ * a_dim1];
+	a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
+
+	i__2 = *ihi - i__;
+	dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
+
+/*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
+
+	i__2 = *ihi - i__;
+	i__3 = *n - i__;
+	dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
+
+	a[i__ + 1 + i__ * a_dim1] = aii;
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of DGEHD2 */
+
+} /* dgehd2_ */
+
diff --git a/lapack-netlib/SRC/dgehrd.c b/lapack-netlib/SRC/dgehrd.c
new file mode 100644
index 000000000..aff1f6691
--- /dev/null
+++ b/lapack-netlib/SRC/dgehrd.c
@@ -0,0 +1,789 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+static integer c__65 = 65;
+static doublereal c_b25 = -1.;
+static doublereal c_b26 = 1.;
+
+/* > \brief \b DGEHRD */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEHRD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgehrd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgehrd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgehrd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            IHI, ILO, INFO, LDA, LWORK, N */
+/*       DOUBLE PRECISION  A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEHRD reduces a real general matrix A to upper Hessenberg form H by */
+/* > an orthogonal similarity transformation:  Q**T * A * Q = H . */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] ILO */
+/* > \verbatim */
+/* >          ILO is INTEGER */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IHI */
+/* > \verbatim */
+/* >          IHI is INTEGER */
+/* > */
+/* >          It is assumed that A is already upper triangular in rows */
+/* >          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
+/* >          set by a previous call to DGEBAL; otherwise they should be */
+/* >          set to 1 and N respectively. See Further Details. */
+/* >          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the N-by-N general matrix to be reduced. */
+/* >          On exit, the upper triangle and the first subdiagonal of A */
+/* >          are overwritten with the upper Hessenberg matrix H, and the */
+/* >          elements below the first subdiagonal, with the array TAU, */
+/* >          represent the orthogonal matrix Q as a product of elementary */
+/* >          reflectors. See Further Details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (N-1) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */
+/* >          zero. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (LWORK) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The length of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For good performance, LWORK should generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of (ihi-ilo) elementary */
+/* >  reflectors */
+/* > */
+/* >     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
+/* >  exit in A(i+2:ihi,i), and tau in TAU(i). */
+/* > */
+/* >  The contents of A are illustrated by the following example, with */
+/* >  n = 7, ilo = 2 and ihi = 6: */
+/* > */
+/* >  on entry,                        on exit, */
+/* > */
+/* >  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
+/* >  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
+/* >  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
+/* >  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
+/* >  (                         a )    (                          a ) */
+/* > */
+/* >  where a denotes an element of the original matrix A, h denotes a */
+/* >  modified element of the upper Hessenberg matrix H, and vi denotes an */
+/* >  element of the vector defining H(i). */
+/* > */
+/* >  This file is a slight modification of LAPACK-3.0's DGEHRD */
+/* >  subroutine incorporating improvements proposed by Quintana-Orti and */
+/* >  Van de Geijn (2006). (See DLAHR2.) */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi, 
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work, 
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, j;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer nbmin, iinfo;
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *), daxpy_(
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *), dgehd2_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *), dlahr2_(
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *);
+    integer ib;
+    doublereal ei;
+    integer nb, nh;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer nx;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iwt;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) {
+	*info = -2;
+    } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -8;
+    }
+
+    if (*info == 0) {
+
+/*        Compute the workspace requirements */
+
+/* Computing MIN */
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nb = f2cmin(i__1,i__2);
+	lwkopt = *n * nb + 4160;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEHRD", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	tau[i__] = 0.;
+/* L10: */
+    }
+    i__1 = *n - 1;
+    for (i__ = f2cmax(1,*ihi); i__ <= i__1; ++i__) {
+	tau[i__] = 0.;
+/* L20: */
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1] = 1.;
+	return 0;
+    }
+
+/*     Determine the block size */
+
+/* Computing MIN */
+    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = f2cmin(i__1,i__2);
+    nbmin = 2;
+    if (nb > 1 && nb < nh) {
+
+/*        Determine when to cross over from blocked to unblocked code */
+/*        (last block is always handled by unblocked code) */
+
+/* Computing MAX */
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code */
+
+	    if (*lwork < *n * nb + 4160) {
+
+/*              Not enough workspace to use optimal NB:  determine the */
+/*              minimum value of NB, and reduce NB or force use of */
+/*              unblocked code */
+
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+		if (*lwork >= *n * nbmin + 4160) {
+		    nb = (*lwork - 4160) / *n;
+		} else {
+		    nb = 1;
+		}
+	    }
+	}
+    }
+    ldwork = *n;
+
+    if (nb < nbmin || nb >= nh) {
+
+/*        Use unblocked code below */
+
+	i__ = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	iwt = *n * nb + 1;
+	i__1 = *ihi - 1 - nx;
+	i__2 = nb;
+	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - i__;
+	    ib = f2cmin(i__3,i__4);
+
+/*           Reduce columns i:i+ib-1 to Hessenberg form, returning the */
+/*           matrices V and T of the block reflector H = I - V*T*V**T */
+/*           which performs the reduction, and also the matrix Y = A*V*T */
+
+	    dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], &
+		    work[iwt], &c__65, &work[1], &ldwork);
+
+/*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */
+/*           right, computing  A := A - Y * V**T. V(i+ib,ib-1) must be set */
+/*           to 1 */
+
+	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.;
+	    i__3 = *ihi - i__ - ib + 1;
+	    dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, &
+		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
+		    c_b26, &a[(i__ + ib) * a_dim1 + 1], lda);
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
+
+/*           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */
+/*           right */
+
+	    i__3 = ib - 1;
+	    dtrmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26,
+		     &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork);
+	    i__3 = ib - 2;
+	    for (j = 0; j <= i__3; ++j) {
+		daxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + 
+			j + 1) * a_dim1 + 1], &c__1);
+/* L30: */
+	    }
+
+/*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */
+/*           left */
+
+	    i__3 = *ihi - i__;
+	    i__4 = *n - i__ - ib + 1;
+	    dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, &work[iwt], &
+		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
+		    ldwork);
+/* L40: */
+	}
+    }
+
+/*     Use unblocked code to reduce the rest of the matrix */
+
+    dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1] = (doublereal) lwkopt;
+
+    return 0;
+
+/*     End of DGEHRD */
+
+} /* dgehrd_ */
+
diff --git a/lapack-netlib/SRC/dgejsv.c b/lapack-netlib/SRC/dgejsv.c
new file mode 100644
index 000000000..6ba003ef9
--- /dev/null
+++ b/lapack-netlib/SRC/dgejsv.c
@@ -0,0 +1,2682 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle_() continue;
+#define myceiling_(w) ceil(w)
+#define myhuge_(w) HUGE_VAL
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b34 = 0.;
+static doublereal c_b35 = 1.;
+static integer c__0 = 0;
+static integer c_n1 = -1;
+
+/* > \brief \b DGEJSV */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEJSV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgejsv.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgejsv.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgejsv.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, */
+/*                          M, N, A, LDA, SVA, U, LDU, V, LDV, */
+/*                          WORK, LWORK, IWORK, INFO ) */
+
+/*       IMPLICIT    NONE */
+/*       INTEGER     INFO, LDA, LDU, LDV, LWORK, M, N */
+/*       DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ), */
+/*      $            WORK( LWORK ) */
+/*       INTEGER     IWORK( * ) */
+/*       CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEJSV computes the singular value decomposition (SVD) of a real M-by-N */
+/* > matrix [A], where M >= N. The SVD of [A] is written as */
+/* > */
+/* >              [A] = [U] * [SIGMA] * [V]^t, */
+/* > */
+/* > where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N */
+/* > diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and */
+/* > [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are */
+/* > the singular values of [A]. The columns of [U] and [V] are the left and */
+/* > the right singular vectors of [A], respectively. The matrices [U] and [V] */
+/* > are computed and stored in the arrays U and V, respectively. The diagonal */
+/* > of [SIGMA] is computed and stored in the array SVA. */
+/* > DGEJSV can sometimes compute tiny singular values and their singular vectors much */
+/* > more accurately than other SVD routines, see below under Further Details. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBA */
+/* > \verbatim */
+/* >          JOBA is CHARACTER*1 */
+/* >        Specifies the level of accuracy: */
+/* >       = 'C': This option works well (high relative accuracy) if A = B * D, */
+/* >             with well-conditioned B and arbitrary diagonal matrix D. */
+/* >             The accuracy cannot be spoiled by COLUMN scaling. The */
+/* >             accuracy of the computed output depends on the condition of */
+/* >             B, and the procedure aims at the best theoretical accuracy. */
+/* >             The relative error max_{i=1:N}|d sigma_i| / sigma_i is */
+/* >             bounded by f(M,N)*epsilon* cond(B), independent of D. */
+/* >             The input matrix is preprocessed with the QRF with column */
+/* >             pivoting. This initial preprocessing and preconditioning by */
+/* >             a rank revealing QR factorization is common for all values of */
+/* >             JOBA. Additional actions are specified as follows: */
+/* >       = 'E': Computation as with 'C' with an additional estimate of the */
+/* >             condition number of B. It provides a realistic error bound. */
+/* >       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings */
+/* >             D1, D2, and well-conditioned matrix C, this option gives */
+/* >             higher accuracy than the 'C' option. If the structure of the */
+/* >             input matrix is not known, and relative accuracy is */
+/* >             desirable, then this option is advisable. The input matrix A */
+/* >             is preprocessed with QR factorization with FULL (row and */
+/* >             column) pivoting. */
+/* >       = 'G': Computation as with 'F' with an additional estimate of the */
+/* >             condition number of B, where A=D*B. If A has heavily weighted */
+/* >             rows, then using this condition number gives too pessimistic */
+/* >             error bound. */
+/* >       = 'A': Small singular values are the noise and the matrix is treated */
+/* >             as numerically rank deficient. The error in the computed */
+/* >             singular values is bounded by f(m,n)*epsilon*||A||. */
+/* >             The computed SVD A = U * S * V^t restores A up to */
+/* >             f(m,n)*epsilon*||A||. */
+/* >             This gives the procedure the licence to discard (set to zero) */
+/* >             all singular values below N*epsilon*||A||. */
+/* >       = 'R': Similar as in 'A'. Rank revealing property of the initial */
+/* >             QR factorization is used do reveal (using triangular factor) */
+/* >             a gap sigma_{r+1} < epsilon * sigma_r in which case the */
+/* >             numerical RANK is declared to be r. The SVD is computed with */
+/* >             absolute error bounds, but more accurately than with 'A'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >        Specifies whether to compute the columns of U: */
+/* >       = 'U': N columns of U are returned in the array U. */
+/* >       = 'F': full set of M left sing. vectors is returned in the array U. */
+/* >       = 'W': U may be used as workspace of length M*N. See the description */
+/* >             of U. */
+/* >       = 'N': U is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >        Specifies whether to compute the matrix V: */
+/* >       = 'V': N columns of V are returned in the array V; Jacobi rotations */
+/* >             are not explicitly accumulated. */
+/* >       = 'J': N columns of V are returned in the array V, but they are */
+/* >             computed as the product of Jacobi rotations. This option is */
+/* >             allowed only if JOBU .NE. 'N', i.e. in computing the full SVD. */
+/* >       = 'W': V may be used as workspace of length N*N. See the description */
+/* >             of V. */
+/* >       = 'N': V is not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBR */
+/* > \verbatim */
+/* >          JOBR is CHARACTER*1 */
+/* >        Specifies the RANGE for the singular values. Issues the licence to */
+/* >        set to zero small positive singular values if they are outside */
+/* >        specified range. If A .NE. 0 is scaled so that the largest singular */
+/* >        value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues */
+/* >        the licence to kill columns of A whose norm in c*A is less than */
+/* >        DSQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN, */
+/* >        where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). */
+/* >       = 'N': Do not kill small columns of c*A. This option assumes that */
+/* >             BLAS and QR factorizations and triangular solvers are */
+/* >             implemented to work in that range. If the condition of A */
+/* >             is greater than BIG, use DGESVJ. */
+/* >       = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)] */
+/* >             (roughly, as described above). This option is recommended. */
+/* >                                            ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
+/* >        For computing the singular values in the FULL range [SFMIN,BIG] */
+/* >        use DGESVJ. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBT */
+/* > \verbatim */
+/* >          JOBT is CHARACTER*1 */
+/* >        If the matrix is square then the procedure may determine to use */
+/* >        transposed A if A^t seems to be better with respect to convergence. */
+/* >        If the matrix is not square, JOBT is ignored. This is subject to */
+/* >        changes in the future. */
+/* >        The decision is based on two values of entropy over the adjoint */
+/* >        orbit of A^t * A. See the descriptions of WORK(6) and WORK(7). */
+/* >       = 'T': transpose if entropy test indicates possibly faster */
+/* >        convergence of Jacobi process if A^t is taken as input. If A is */
+/* >        replaced with A^t, then the row pivoting is included automatically. */
+/* >       = 'N': do not speculate. */
+/* >        This option can be used to compute only the singular values, or the */
+/* >        full SVD (U, SIGMA and V). For only one set of singular vectors */
+/* >        (U or V), the caller should provide both U and V, as one of the */
+/* >        matrices is used as workspace if the matrix A is transposed. */
+/* >        The implementer can easily remove this constraint and make the */
+/* >        code more complicated. See the descriptions of U and V. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBP */
+/* > \verbatim */
+/* >          JOBP is CHARACTER*1 */
+/* >        Issues the licence to introduce structured perturbations to drown */
+/* >        denormalized numbers. This licence should be active if the */
+/* >        denormals are poorly implemented, causing slow computation, */
+/* >        especially in cases of fast convergence (!). For details see [1,2]. */
+/* >        For the sake of simplicity, this perturbations are included only */
+/* >        when the full SVD or only the singular values are requested. The */
+/* >        implementer/user can easily add the perturbation for the cases of */
+/* >        computing one set of singular vectors. */
+/* >       = 'P': introduce perturbation */
+/* >       = 'N': do not perturb */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >         The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >         The number of columns of the input matrix A. M >= N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SVA */
+/* > \verbatim */
+/* >          SVA is DOUBLE PRECISION array, dimension (N) */
+/* >          On exit, */
+/* >          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the */
+/* >            computation SVA contains Euclidean column norms of the */
+/* >            iterated matrices in the array A. */
+/* >          - For WORK(1) .NE. WORK(2): The singular values of A are */
+/* >            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if */
+/* >            sigma_max(A) overflows or if small singular values have been */
+/* >            saved from underflow by scaling the input matrix A. */
+/* >          - If JOBR='R' then some of the singular values may be returned */
+/* >            as exact zeros obtained by "set to zero" because they are */
+/* >            below the numerical rank threshold or are denormalized numbers. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is DOUBLE PRECISION array, dimension ( LDU, N ) */
+/* >          If JOBU = 'U', then U contains on exit the M-by-N matrix of */
+/* >                         the left singular vectors. */
+/* >          If JOBU = 'F', then U contains on exit the M-by-M matrix of */
+/* >                         the left singular vectors, including an ONB */
+/* >                         of the orthogonal complement of the Range(A). */
+/* >          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N), */
+/* >                         then U is used as workspace if the procedure */
+/* >                         replaces A with A^t. In that case, [V] is computed */
+/* >                         in U as left singular vectors of A^t and then */
+/* >                         copied back to the V array. This 'W' option is just */
+/* >                         a reminder to the caller that in this case U is */
+/* >                         reserved as workspace of length N*N. */
+/* >          If JOBU = 'N'  U is not referenced, unless JOBT='T'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U,  LDU >= 1. */
+/* >          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] V */
+/* > \verbatim */
+/* >          V is DOUBLE PRECISION array, dimension ( LDV, N ) */
+/* >          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of */
+/* >                         the right singular vectors; */
+/* >          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N), */
+/* >                         then V is used as workspace if the pprocedure */
+/* >                         replaces A with A^t. In that case, [U] is computed */
+/* >                         in V as right singular vectors of A^t and then */
+/* >                         copied back to the U array. This 'W' option is just */
+/* >                         a reminder to the caller that in this case V is */
+/* >                         reserved as workspace of length N*N. */
+/* >          If JOBV = 'N'  V is not referenced, unless JOBT='T'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V,  LDV >= 1. */
+/* >          If JOBV = 'V' or 'J' or 'W', then LDV >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (LWORK) */
+/* >          On exit, if N > 0 .AND. M > 0 (else not referenced), */
+/* >          WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such */
+/* >                    that SCALE*SVA(1:N) are the computed singular values */
+/* >                    of A. (See the description of SVA().) */
+/* >          WORK(2) = See the description of WORK(1). */
+/* >          WORK(3) = SCONDA is an estimate for the condition number of */
+/* >                    column equilibrated A. (If JOBA = 'E' or 'G') */
+/* >                    SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). */
+/* >                    It is computed using DPOCON. It holds */
+/* >                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */
+/* >                    where R is the triangular factor from the QRF of A. */
+/* >                    However, if R is truncated and the numerical rank is */
+/* >                    determined to be strictly smaller than N, SCONDA is */
+/* >                    returned as -1, thus indicating that the smallest */
+/* >                    singular values might be lost. */
+/* > */
+/* >          If full SVD is needed, the following two condition numbers are */
+/* >          useful for the analysis of the algorithm. They are provied for */
+/* >          a developer/implementer who is familiar with the details of */
+/* >          the method. */
+/* > */
+/* >          WORK(4) = an estimate of the scaled condition number of the */
+/* >                    triangular factor in the first QR factorization. */
+/* >          WORK(5) = an estimate of the scaled condition number of the */
+/* >                    triangular factor in the second QR factorization. */
+/* >          The following two parameters are computed if JOBT = 'T'. */
+/* >          They are provided for a developer/implementer who is familiar */
+/* >          with the details of the method. */
+/* > */
+/* >          WORK(6) = the entropy of A^t*A :: this is the Shannon entropy */
+/* >                    of diag(A^t*A) / Trace(A^t*A) taken as point in the */
+/* >                    probability simplex. */
+/* >          WORK(7) = the entropy of A*A^t. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          Length of WORK to confirm proper allocation of work space. */
+/* >          LWORK depends on the job: */
+/* > */
+/* >          If only SIGMA is needed (JOBU = 'N', JOBV = 'N') and */
+/* >            -> .. no scaled condition estimate required (JOBE = 'N'): */
+/* >               LWORK >= f2cmax(2*M+N,4*N+1,7). This is the minimal requirement. */
+/* >               ->> For optimal performance (blocked code) the optimal value */
+/* >               is LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal */
+/* >               block size for DGEQP3 and DGEQRF. */
+/* >               In general, optimal LWORK is computed as */
+/* >               LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7). */
+/* >            -> .. an estimate of the scaled condition number of A is */
+/* >               required (JOBA='E', 'G'). In this case, LWORK is the maximum */
+/* >               of the above and N*N+4*N, i.e. LWORK >= f2cmax(2*M+N,N*N+4*N,7). */
+/* >               ->> For optimal performance (blocked code) the optimal value */
+/* >               is LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7). */
+/* >               In general, the optimal length LWORK is computed as */
+/* >               LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), */
+/* >                                                     N+N*N+LWORK(DPOCON),7). */
+/* > */
+/* >          If SIGMA and the right singular vectors are needed (JOBV = 'V'), */
+/* >            -> the minimal requirement is LWORK >= f2cmax(2*M+N,4*N+1,7). */
+/* >            -> For optimal performance, LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7), */
+/* >               where NB is the optimal block size for DGEQP3, DGEQRF, DGELQF, */
+/* >               DORMLQ. In general, the optimal length LWORK is computed as */
+/* >               LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON), */
+/* >                       N+LWORK(DGELQF), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)). */
+/* > */
+/* >          If SIGMA and the left singular vectors are needed */
+/* >            -> the minimal requirement is LWORK >= f2cmax(2*M+N,4*N+1,7). */
+/* >            -> For optimal performance: */
+/* >               if JOBU = 'U' :: LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7), */
+/* >               if JOBU = 'F' :: LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,N+M*NB,7), */
+/* >               where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR. */
+/* >               In general, the optimal length LWORK is computed as */
+/* >               LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON), */
+/* >                        2*N+LWORK(DGEQRF), N+LWORK(DORMQR)). */
+/* >               Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or */
+/* >               M*NB (for JOBU = 'F'). */
+/* > */
+/* >          If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and */
+/* >            -> if JOBV = 'V' */
+/* >               the minimal requirement is LWORK >= f2cmax(2*M+N,6*N+2*N*N). */
+/* >            -> if JOBV = 'J' the minimal requirement is */
+/* >               LWORK >= f2cmax(2*M+N, 4*N+N*N,2*N+N*N+6). */
+/* >            -> For optimal performance, LWORK should be additionally */
+/* >               larger than N+M*NB, where NB is the optimal block size */
+/* >               for DORMQR. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (M+3*N). */
+/* >          On exit, */
+/* >          IWORK(1) = the numerical rank determined after the initial */
+/* >                     QR factorization with pivoting. See the descriptions */
+/* >                     of JOBA and JOBR. */
+/* >          IWORK(2) = the number of the computed nonzero singular values */
+/* >          IWORK(3) = if nonzero, a warning message: */
+/* >                     If IWORK(3) = 1 then some of the column norms of A */
+/* >                     were denormalized floats. The requested high accuracy */
+/* >                     is not warranted by the data. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >           < 0:  if INFO = -i, then the i-th argument had an illegal value. */
+/* >           = 0:  successful exit; */
+/* >           > 0:  DGEJSV  did not converge in the maximal allowed number */
+/* >                 of sweeps. The computed values may be inaccurate. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup doubleGEsing */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses DGEQP3, */
+/* >  DGEQRF, and DGELQF as preprocessors and preconditioners. Optionally, an */
+/* >  additional row pivoting can be used as a preprocessor, which in some */
+/* >  cases results in much higher accuracy. An example is matrix A with the */
+/* >  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned */
+/* >  diagonal matrices and C is well-conditioned matrix. In that case, complete */
+/* >  pivoting in the first QR factorizations provides accuracy dependent on the */
+/* >  condition number of C, and independent of D1, D2. Such higher accuracy is */
+/* >  not completely understood theoretically, but it works well in practice. */
+/* >  Further, if A can be written as A = B*D, with well-conditioned B and some */
+/* >  diagonal D, then the high accuracy is guaranteed, both theoretically and */
+/* >  in software, independent of D. For more details see [1], [2]. */
+/* >     The computational range for the singular values can be the full range */
+/* >  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS */
+/* >  & LAPACK routines called by DGEJSV are implemented to work in that range. */
+/* >  If that is not the case, then the restriction for safe computation with */
+/* >  the singular values in the range of normalized IEEE numbers is that the */
+/* >  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not */
+/* >  overflow. This code (DGEJSV) is best used in this restricted range, */
+/* >  meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are */
+/* >  returned as zeros. See JOBR for details on this. */
+/* >     Further, this implementation is somewhat slower than the one described */
+/* >  in [1,2] due to replacement of some non-LAPACK components, and because */
+/* >  the choice of some tuning parameters in the iterative part (DGESVJ) is */
+/* >  left to the implementer on a particular machine. */
+/* >     The rank revealing QR factorization (in this code: DGEQP3) should be */
+/* >  implemented as in [3]. We have a new version of DGEQP3 under development */
+/* >  that is more robust than the current one in LAPACK, with a cleaner cut in */
+/* >  rank deficient cases. It will be available in the SIGMA library [4]. */
+/* >  If M is much larger than N, it is obvious that the initial QRF with */
+/* >  column pivoting can be preprocessed by the QRF without pivoting. That */
+/* >  well known trick is not used in DGEJSV because in some cases heavy row */
+/* >  weighting can be treated with complete pivoting. The overhead in cases */
+/* >  M much larger than N is then only due to pivoting, but the benefits in */
+/* >  terms of accuracy have prevailed. The implementer/user can incorporate */
+/* >  this extra QRF step easily. The implementer can also improve data movement */
+/* >  (matrix transpose, matrix copy, matrix transposed copy) - this */
+/* >  implementation of DGEJSV uses only the simplest, naive data movement. */
+/* > \endverbatim */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */
+
+/* > \par References: */
+/*  ================ */
+/* > */
+/* > \verbatim */
+/* > */
+/* > [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */
+/* >     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */
+/* >     LAPACK Working note 169. */
+/* > [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */
+/* >     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */
+/* >     LAPACK Working note 170. */
+/* > [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR */
+/* >     factorization software - a case study. */
+/* >     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. */
+/* >     LAPACK Working note 176. */
+/* > [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */
+/* >     QSVD, (H,K)-SVD computations. */
+/* >     Department of Mathematics, University of Zagreb, 2008. */
+/* > \endverbatim */
+
+/* >  \par Bugs, examples and comments: */
+/*   ================================= */
+/* > */
+/* >  Please report all bugs and send interesting examples and/or comments to */
+/* >  drmac@math.hr. Thank you. */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgejsv_(char *joba, char *jobu, char *jobv, char *jobr, 
+	char *jobt, char *jobp, integer *m, integer *n, doublereal *a, 
+	integer *lda, doublereal *sva, doublereal *u, integer *ldu, 
+	doublereal *v, integer *ldv, doublereal *work, integer *lwork, 
+	integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, 
+	    i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11, i__12;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Local variables */
+    logical defr;
+    doublereal aapp, aaqq;
+    logical kill;
+    integer ierr;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    doublereal temp1;
+    integer p, q;
+    logical jracc;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    extern logical lsame_(char *, char *);
+    doublereal small, entra, sfmin;
+    logical lsvec;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *), dswap_(integer *, doublereal *, integer 
+	    *, doublereal *, integer *);
+    doublereal epsln;
+    logical rsvec;
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer n1;
+    logical l2aber;
+    extern /* Subroutine */ int dgeqp3_(integer *, integer *, doublereal *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *);
+    doublereal condr1, condr2, uscal1, uscal2;
+    logical l2kill, l2rank, l2tran, l2pert;
+    extern doublereal dlamch_(char *);
+    integer nr;
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    doublereal scalem;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *);
+    doublereal sconda;
+    logical goscal;
+    doublereal aatmin;
+    extern /* Subroutine */ int dgeqrf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *);
+    doublereal aatmax;
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *), 
+	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
+    logical noscal;
+    extern /* Subroutine */ int dpocon_(char *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, integer *, 
+	    integer *), dgesvj_(char *, char *, char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *), dlassq_(integer *, doublereal *, integer 
+	    *, doublereal *, doublereal *), dlaswp_(integer *, doublereal *, 
+	    integer *, integer *, integer *, integer *, integer *);
+    doublereal entrat;
+    logical almort;
+    extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dormlq_(char *, char *, integer *, integer *, integer 
+	    *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+	     doublereal *, integer *, integer *);
+    doublereal maxprj;
+    logical errest;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *);
+    logical transp, rowpiv;
+    doublereal big, cond_ok__, xsc, big1;
+    integer warning, numrank;
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  =========================================================================== */
+
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    --sva;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    lsvec = lsame_(jobu, "U") || lsame_(jobu, "F");
+    jracc = lsame_(jobv, "J");
+    rsvec = lsame_(jobv, "V") || jracc;
+    rowpiv = lsame_(joba, "F") || lsame_(joba, "G");
+    l2rank = lsame_(joba, "R");
+    l2aber = lsame_(joba, "A");
+    errest = lsame_(joba, "E") || lsame_(joba, "G");
+    l2tran = lsame_(jobt, "T");
+    l2kill = lsame_(jobr, "R");
+    defr = lsame_(jobr, "N");
+    l2pert = lsame_(jobp, "P");
+
+    if (! (rowpiv || l2rank || l2aber || errest || lsame_(joba, "C"))) {
+	*info = -1;
+    } else if (! (lsvec || lsame_(jobu, "N") || lsame_(
+	    jobu, "W"))) {
+	*info = -2;
+    } else if (! (rsvec || lsame_(jobv, "N") || lsame_(
+	    jobv, "W")) || jracc && ! lsvec) {
+	*info = -3;
+    } else if (! (l2kill || defr)) {
+	*info = -4;
+    } else if (! (l2tran || lsame_(jobt, "N"))) {
+	*info = -5;
+    } else if (! (l2pert || lsame_(jobp, "N"))) {
+	*info = -6;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*n < 0 || *n > *m) {
+	*info = -8;
+    } else if (*lda < *m) {
+	*info = -10;
+    } else if (lsvec && *ldu < *m) {
+	*info = -13;
+    } else if (rsvec && *ldv < *n) {
+	*info = -15;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 7, i__2 = (*n << 2) + 1, i__1 = f2cmax(i__1,i__2), i__2 = (*m << 
+		1) + *n;
+/* Computing MAX */
+	i__3 = 7, i__4 = (*n << 2) + *n * *n, i__3 = f2cmax(i__3,i__4), i__4 = (*
+		m << 1) + *n;
+/* Computing MAX */
+	i__5 = 7, i__6 = (*m << 1) + *n, i__5 = f2cmax(i__5,i__6), i__6 = (*n << 
+		2) + 1;
+/* Computing MAX */
+	i__7 = 7, i__8 = (*m << 1) + *n, i__7 = f2cmax(i__7,i__8), i__8 = (*n << 
+		2) + 1;
+/* Computing MAX */
+	i__9 = (*m << 1) + *n, i__10 = *n * 6 + (*n << 1) * *n;
+/* Computing MAX */
+	i__11 = (*m << 1) + *n, i__12 = (*n << 2) + *n * *n, i__11 = f2cmax(
+		i__11,i__12), i__12 = (*n << 1) + *n * *n + 6;
+	if (! (lsvec || rsvec || errest) && *lwork < f2cmax(i__1,i__2) || ! (
+		lsvec || rsvec) && errest && *lwork < f2cmax(i__3,i__4) || lsvec 
+		&& ! rsvec && *lwork < f2cmax(i__5,i__6) || rsvec && ! lsvec && *
+		lwork < f2cmax(i__7,i__8) || lsvec && rsvec && ! jracc && *lwork 
+		< f2cmax(i__9,i__10) || lsvec && rsvec && jracc && *lwork < f2cmax(
+		i__11,i__12)) {
+	    *info = -17;
+	} else {
+/*        #:) */
+	    *info = 0;
+	}
+    }
+
+    if (*info != 0) {
+/*       #:( */
+	i__1 = -(*info);
+	xerbla_("DGEJSV", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return for void matrix (Y3K safe) */
+/* #:) */
+    if (*m == 0 || *n == 0) {
+	iwork[1] = 0;
+	iwork[2] = 0;
+	iwork[3] = 0;
+	work[1] = 0.;
+	work[2] = 0.;
+	work[3] = 0.;
+	work[4] = 0.;
+	work[5] = 0.;
+	work[6] = 0.;
+	work[7] = 0.;
+	return 0;
+    }
+
+/*     Determine whether the matrix U should be M x N or M x M */
+
+    if (lsvec) {
+	n1 = *n;
+	if (lsame_(jobu, "F")) {
+	    n1 = *m;
+	}
+    }
+
+/*     Set numerical parameters */
+
+/* !    NOTE: Make sure DLAMCH() does not fail on the target architecture. */
+
+    epsln = dlamch_("Epsilon");
+    sfmin = dlamch_("SafeMinimum");
+    small = sfmin / epsln;
+    big = dlamch_("O");
+/*     BIG   = ONE / SFMIN */
+
+/*     Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N */
+
+/* (!)  If necessary, scale SVA() to protect the largest norm from */
+/*     overflow. It is possible that this scaling pushes the smallest */
+/*     column norm left from the underflow threshold (extreme case). */
+
+    scalem = 1. / sqrt((doublereal) (*m) * (doublereal) (*n));
+    noscal = TRUE_;
+    goscal = TRUE_;
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+	aapp = 0.;
+	aaqq = 1.;
+	dlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
+	if (aapp > big) {
+	    *info = -9;
+	    i__2 = -(*info);
+	    xerbla_("DGEJSV", &i__2, (ftnlen)6);
+	    return 0;
+	}
+	aaqq = sqrt(aaqq);
+	if (aapp < big / aaqq && noscal) {
+	    sva[p] = aapp * aaqq;
+	} else {
+	    noscal = FALSE_;
+	    sva[p] = aapp * (aaqq * scalem);
+	    if (goscal) {
+		goscal = FALSE_;
+		i__2 = p - 1;
+		dscal_(&i__2, &scalem, &sva[1], &c__1);
+	    }
+	}
+/* L1874: */
+    }
+
+    if (noscal) {
+	scalem = 1.;
+    }
+
+    aapp = 0.;
+    aaqq = big;
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+/* Computing MAX */
+	d__1 = aapp, d__2 = sva[p];
+	aapp = f2cmax(d__1,d__2);
+	if (sva[p] != 0.) {
+/* Computing MIN */
+	    d__1 = aaqq, d__2 = sva[p];
+	    aaqq = f2cmin(d__1,d__2);
+	}
+/* L4781: */
+    }
+
+/*     Quick return for zero M x N matrix */
+/* #:) */
+    if (aapp == 0.) {
+	if (lsvec) {
+	    dlaset_("G", m, &n1, &c_b34, &c_b35, &u[u_offset], ldu)
+		    ;
+	}
+	if (rsvec) {
+	    dlaset_("G", n, n, &c_b34, &c_b35, &v[v_offset], ldv);
+	}
+	work[1] = 1.;
+	work[2] = 1.;
+	if (errest) {
+	    work[3] = 1.;
+	}
+	if (lsvec && rsvec) {
+	    work[4] = 1.;
+	    work[5] = 1.;
+	}
+	if (l2tran) {
+	    work[6] = 0.;
+	    work[7] = 0.;
+	}
+	iwork[1] = 0;
+	iwork[2] = 0;
+	iwork[3] = 0;
+	return 0;
+    }
+
+/*     Issue warning if denormalized column norms detected. Override the */
+/*     high relative accuracy request. Issue licence to kill columns */
+/*     (set them to zero) whose norm is less than sigma_max / BIG (roughly). */
+/* #:( */
+    warning = 0;
+    if (aaqq <= sfmin) {
+	l2rank = TRUE_;
+	l2kill = TRUE_;
+	warning = 1;
+    }
+
+/*     Quick return for one-column matrix */
+/* #:) */
+    if (*n == 1) {
+
+	if (lsvec) {
+	    dlascl_("G", &c__0, &c__0, &sva[1], &scalem, m, &c__1, &a[a_dim1 
+		    + 1], lda, &ierr);
+	    dlacpy_("A", m, &c__1, &a[a_offset], lda, &u[u_offset], ldu);
+/*           computing all M left singular vectors of the M x 1 matrix */
+	    if (n1 != *n) {
+		i__1 = *lwork - *n;
+		dgeqrf_(m, n, &u[u_offset], ldu, &work[1], &work[*n + 1], &
+			i__1, &ierr);
+		i__1 = *lwork - *n;
+		dorgqr_(m, &n1, &c__1, &u[u_offset], ldu, &work[1], &work[*n 
+			+ 1], &i__1, &ierr);
+		dcopy_(m, &a[a_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
+	    }
+	}
+	if (rsvec) {
+	    v[v_dim1 + 1] = 1.;
+	}
+	if (sva[1] < big * scalem) {
+	    sva[1] /= scalem;
+	    scalem = 1.;
+	}
+	work[1] = 1. / scalem;
+	work[2] = 1.;
+	if (sva[1] != 0.) {
+	    iwork[1] = 1;
+	    if (sva[1] / scalem >= sfmin) {
+		iwork[2] = 1;
+	    } else {
+		iwork[2] = 0;
+	    }
+	} else {
+	    iwork[1] = 0;
+	    iwork[2] = 0;
+	}
+	iwork[3] = 0;
+	if (errest) {
+	    work[3] = 1.;
+	}
+	if (lsvec && rsvec) {
+	    work[4] = 1.;
+	    work[5] = 1.;
+	}
+	if (l2tran) {
+	    work[6] = 0.;
+	    work[7] = 0.;
+	}
+	return 0;
+
+    }
+
+    transp = FALSE_;
+    l2tran = l2tran && *m == *n;
+
+    aatmax = -1.;
+    aatmin = big;
+    if (rowpiv || l2tran) {
+
+/*     Compute the row norms, needed to determine row pivoting sequence */
+/*     (in the case of heavily row weighted A, row pivoting is strongly */
+/*     advised) and to collect information needed to compare the */
+/*     structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.). */
+
+	if (l2tran) {
+	    i__1 = *m;
+	    for (p = 1; p <= i__1; ++p) {
+		xsc = 0.;
+		temp1 = 1.;
+		dlassq_(n, &a[p + a_dim1], lda, &xsc, &temp1);
+/*              DLASSQ gets both the ell_2 and the ell_infinity norm */
+/*              in one pass through the vector */
+		work[*m + *n + p] = xsc * scalem;
+		work[*n + p] = xsc * (scalem * sqrt(temp1));
+/* Computing MAX */
+		d__1 = aatmax, d__2 = work[*n + p];
+		aatmax = f2cmax(d__1,d__2);
+		if (work[*n + p] != 0.) {
+/* Computing MIN */
+		    d__1 = aatmin, d__2 = work[*n + p];
+		    aatmin = f2cmin(d__1,d__2);
+		}
+/* L1950: */
+	    }
+	} else {
+	    i__1 = *m;
+	    for (p = 1; p <= i__1; ++p) {
+		work[*m + *n + p] = scalem * (d__1 = a[p + idamax_(n, &a[p + 
+			a_dim1], lda) * a_dim1], abs(d__1));
+/* Computing MAX */
+		d__1 = aatmax, d__2 = work[*m + *n + p];
+		aatmax = f2cmax(d__1,d__2);
+/* Computing MIN */
+		d__1 = aatmin, d__2 = work[*m + *n + p];
+		aatmin = f2cmin(d__1,d__2);
+/* L1904: */
+	    }
+	}
+
+    }
+
+/*     For square matrix A try to determine whether A^t  would be  better */
+/*     input for the preconditioned Jacobi SVD, with faster convergence. */
+/*     The decision is based on an O(N) function of the vector of column */
+/*     and row norms of A, based on the Shannon entropy. This should give */
+/*     the right choice in most cases when the difference actually matters. */
+/*     It may fail and pick the slower converging side. */
+
+    entra = 0.;
+    entrat = 0.;
+    if (l2tran) {
+
+	xsc = 0.;
+	temp1 = 1.;
+	dlassq_(n, &sva[1], &c__1, &xsc, &temp1);
+	temp1 = 1. / temp1;
+
+	entra = 0.;
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+/* Computing 2nd power */
+	    d__1 = sva[p] / xsc;
+	    big1 = d__1 * d__1 * temp1;
+	    if (big1 != 0.) {
+		entra += big1 * log(big1);
+	    }
+/* L1113: */
+	}
+	entra = -entra / log((doublereal) (*n));
+
+/*        Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex. */
+/*        It is derived from the diagonal of  A^t * A.  Do the same with the */
+/*        diagonal of A * A^t, compute the entropy of the corresponding */
+/*        probability distribution. Note that A * A^t and A^t * A have the */
+/*        same trace. */
+
+	entrat = 0.;
+	i__1 = *n + *m;
+	for (p = *n + 1; p <= i__1; ++p) {
+/* Computing 2nd power */
+	    d__1 = work[p] / xsc;
+	    big1 = d__1 * d__1 * temp1;
+	    if (big1 != 0.) {
+		entrat += big1 * log(big1);
+	    }
+/* L1114: */
+	}
+	entrat = -entrat / log((doublereal) (*m));
+
+/*        Analyze the entropies and decide A or A^t. Smaller entropy */
+/*        usually means better input for the algorithm. */
+
+	transp = entrat < entra;
+
+/*        If A^t is better than A, transpose A. */
+
+	if (transp) {
+/*           In an optimal implementation, this trivial transpose */
+/*           should be replaced with faster transpose. */
+	    i__1 = *n - 1;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n;
+		for (q = p + 1; q <= i__2; ++q) {
+		    temp1 = a[q + p * a_dim1];
+		    a[q + p * a_dim1] = a[p + q * a_dim1];
+		    a[p + q * a_dim1] = temp1;
+/* L1116: */
+		}
+/* L1115: */
+	    }
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		work[*m + *n + p] = sva[p];
+		sva[p] = work[*n + p];
+/* L1117: */
+	    }
+	    temp1 = aapp;
+	    aapp = aatmax;
+	    aatmax = temp1;
+	    temp1 = aaqq;
+	    aaqq = aatmin;
+	    aatmin = temp1;
+	    kill = lsvec;
+	    lsvec = rsvec;
+	    rsvec = kill;
+	    if (lsvec) {
+		n1 = *n;
+	    }
+
+	    rowpiv = TRUE_;
+	}
+
+    }
+/*     END IF L2TRAN */
+
+/*     Scale the matrix so that its maximal singular value remains less */
+/*     than DSQRT(BIG) -- the matrix is scaled so that its maximal column */
+/*     has Euclidean norm equal to DSQRT(BIG/N). The only reason to keep */
+/*     DSQRT(BIG) instead of BIG is the fact that DGEJSV uses LAPACK and */
+/*     BLAS routines that, in some implementations, are not capable of */
+/*     working in the full interval [SFMIN,BIG] and that they may provoke */
+/*     overflows in the intermediate results. If the singular values spread */
+/*     from SFMIN to BIG, then DGESVJ will compute them. So, in that case, */
+/*     one should use DGESVJ instead of DGEJSV. */
+
+    big1 = sqrt(big);
+    temp1 = sqrt(big / (doublereal) (*n));
+
+    dlascl_("G", &c__0, &c__0, &aapp, &temp1, n, &c__1, &sva[1], n, &ierr);
+    if (aaqq > aapp * sfmin) {
+	aaqq = aaqq / aapp * temp1;
+    } else {
+	aaqq = aaqq * temp1 / aapp;
+    }
+    temp1 *= scalem;
+    dlascl_("G", &c__0, &c__0, &aapp, &temp1, m, n, &a[a_offset], lda, &ierr);
+
+/*     To undo scaling at the end of this procedure, multiply the */
+/*     computed singular values with USCAL2 / USCAL1. */
+
+    uscal1 = temp1;
+    uscal2 = aapp;
+
+    if (l2kill) {
+/*        L2KILL enforces computation of nonzero singular values in */
+/*        the restricted range of condition number of the initial A, */
+/*        sigma_max(A) / sigma_min(A) approx. DSQRT(BIG)/DSQRT(SFMIN). */
+	xsc = sqrt(sfmin);
+    } else {
+	xsc = small;
+
+/*        Now, if the condition number of A is too big, */
+/*        sigma_max(A) / sigma_min(A) .GT. DSQRT(BIG/N) * EPSLN / SFMIN, */
+/*        as a precaution measure, the full SVD is computed using DGESVJ */
+/*        with accumulated Jacobi rotations. This provides numerically */
+/*        more robust computation, at the cost of slightly increased run */
+/*        time. Depending on the concrete implementation of BLAS and LAPACK */
+/*        (i.e. how they behave in presence of extreme ill-conditioning) the */
+/*        implementor may decide to remove this switch. */
+	if (aaqq < sqrt(sfmin) && lsvec && rsvec) {
+	    jracc = TRUE_;
+	}
+
+    }
+    if (aaqq < xsc) {
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    if (sva[p] < xsc) {
+		dlaset_("A", m, &c__1, &c_b34, &c_b34, &a[p * a_dim1 + 1], 
+			lda);
+		sva[p] = 0.;
+	    }
+/* L700: */
+	}
+    }
+
+/*     Preconditioning using QR factorization with pivoting */
+
+    if (rowpiv) {
+/*        Optional row permutation (Bjoerck row pivoting): */
+/*        A result by Cox and Higham shows that the Bjoerck's */
+/*        row pivoting combined with standard column pivoting */
+/*        has similar effect as Powell-Reid complete pivoting. */
+/*        The ell-infinity norms of A are made nonincreasing. */
+	i__1 = *m - 1;
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = *m - p + 1;
+	    q = idamax_(&i__2, &work[*m + *n + p], &c__1) + p - 1;
+	    iwork[(*n << 1) + p] = q;
+	    if (p != q) {
+		temp1 = work[*m + *n + p];
+		work[*m + *n + p] = work[*m + *n + q];
+		work[*m + *n + q] = temp1;
+	    }
+/* L1952: */
+	}
+	i__1 = *m - 1;
+	dlaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[(*n << 1) + 1], &
+		c__1);
+    }
+
+/*     End of the preparation phase (scaling, optional sorting and */
+/*     transposing, optional flushing of small columns). */
+
+/*     Preconditioning */
+
+/*     If the full SVD is needed, the right singular vectors are computed */
+/*     from a matrix equation, and for that we need theoretical analysis */
+/*     of the Businger-Golub pivoting. So we use DGEQP3 as the first RR QRF. */
+/*     In all other cases the first RR QRF can be chosen by other criteria */
+/*     (eg speed by replacing global with restricted window pivoting, such */
+/*     as in SGEQPX from TOMS # 782). Good results will be obtained using */
+/*     SGEQPX with properly (!) chosen numerical parameters. */
+/*     Any improvement of DGEQP3 improves overal performance of DGEJSV. */
+
+/*     A * P1 = Q1 * [ R1^t 0]^t: */
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+	iwork[p] = 0;
+/* L1963: */
+    }
+    i__1 = *lwork - *n;
+    dgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], &
+	    i__1, &ierr);
+
+/*     The upper triangular matrix R1 from the first QRF is inspected for */
+/*     rank deficiency and possibilities for deflation, or possible */
+/*     ill-conditioning. Depending on the user specified flag L2RANK, */
+/*     the procedure explores possibilities to reduce the numerical */
+/*     rank by inspecting the computed upper triangular factor. If */
+/*     L2RANK or L2ABER are up, then DGEJSV will compute the SVD of */
+/*     A + dA, where ||dA|| <= f(M,N)*EPSLN. */
+
+    nr = 1;
+    if (l2aber) {
+/*        Standard absolute error bound suffices. All sigma_i with */
+/*        sigma_i < N*EPSLN*||A|| are flushed to zero. This is an */
+/*        aggressive enforcement of lower numerical rank by introducing a */
+/*        backward error of the order of N*EPSLN*||A||. */
+	temp1 = sqrt((doublereal) (*n)) * epsln;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((d__2 = a[p + p * a_dim1], abs(d__2)) >= temp1 * (d__1 = a[
+		    a_dim1 + 1], abs(d__1))) {
+		++nr;
+	    } else {
+		goto L3002;
+	    }
+/* L3001: */
+	}
+L3002:
+	;
+    } else if (l2rank) {
+/*        Sudden drop on the diagonal of R1 is used as the criterion for */
+/*        close-to-rank-deficient. */
+	temp1 = sqrt(sfmin);
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((d__2 = a[p + p * a_dim1], abs(d__2)) < epsln * (d__1 = a[p - 
+		    1 + (p - 1) * a_dim1], abs(d__1)) || (d__3 = a[p + p * 
+		    a_dim1], abs(d__3)) < small || l2kill && (d__4 = a[p + p *
+		     a_dim1], abs(d__4)) < temp1) {
+		goto L3402;
+	    }
+	    ++nr;
+/* L3401: */
+	}
+L3402:
+
+	;
+    } else {
+/*        The goal is high relative accuracy. However, if the matrix */
+/*        has high scaled condition number the relative accuracy is in */
+/*        general not feasible. Later on, a condition number estimator */
+/*        will be deployed to estimate the scaled condition number. */
+/*        Here we just remove the underflowed part of the triangular */
+/*        factor. This prevents the situation in which the code is */
+/*        working hard to get the accuracy not warranted by the data. */
+	temp1 = sqrt(sfmin);
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((d__1 = a[p + p * a_dim1], abs(d__1)) < small || l2kill && (
+		    d__2 = a[p + p * a_dim1], abs(d__2)) < temp1) {
+		goto L3302;
+	    }
+	    ++nr;
+/* L3301: */
+	}
+L3302:
+
+	;
+    }
+
+    almort = FALSE_;
+    if (nr == *n) {
+	maxprj = 1.;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    temp1 = (d__1 = a[p + p * a_dim1], abs(d__1)) / sva[iwork[p]];
+	    maxprj = f2cmin(maxprj,temp1);
+/* L3051: */
+	}
+/* Computing 2nd power */
+	d__1 = maxprj;
+	if (d__1 * d__1 >= 1. - (doublereal) (*n) * epsln) {
+	    almort = TRUE_;
+	}
+    }
+
+
+    sconda = -1.;
+    condr1 = -1.;
+    condr2 = -1.;
+
+    if (errest) {
+	if (*n == nr) {
+	    if (rsvec) {
+		dlacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    temp1 = sva[iwork[p]];
+		    d__1 = 1. / temp1;
+		    dscal_(&p, &d__1, &v[p * v_dim1 + 1], &c__1);
+/* L3053: */
+		}
+		dpocon_("U", n, &v[v_offset], ldv, &c_b35, &temp1, &work[*n + 
+			1], &iwork[(*n << 1) + *m + 1], &ierr);
+	    } else if (lsvec) {
+		dlacpy_("U", n, n, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    temp1 = sva[iwork[p]];
+		    d__1 = 1. / temp1;
+		    dscal_(&p, &d__1, &u[p * u_dim1 + 1], &c__1);
+/* L3054: */
+		}
+		dpocon_("U", n, &u[u_offset], ldu, &c_b35, &temp1, &work[*n + 
+			1], &iwork[(*n << 1) + *m + 1], &ierr);
+	    } else {
+		dlacpy_("U", n, n, &a[a_offset], lda, &work[*n + 1], n);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    temp1 = sva[iwork[p]];
+		    d__1 = 1. / temp1;
+		    dscal_(&p, &d__1, &work[*n + (p - 1) * *n + 1], &c__1);
+/* L3052: */
+		}
+		dpocon_("U", n, &work[*n + 1], n, &c_b35, &temp1, &work[*n + *
+			n * *n + 1], &iwork[(*n << 1) + *m + 1], &ierr);
+	    }
+	    sconda = 1. / sqrt(temp1);
+/*           SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). */
+/*           N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */
+	} else {
+	    sconda = -1.;
+	}
+    }
+
+    l2pert = l2pert && (d__1 = a[a_dim1 + 1] / a[nr + nr * a_dim1], abs(d__1))
+	     > sqrt(big1);
+/*     If there is no violent scaling, artificial perturbation is not needed. */
+
+/*     Phase 3: */
+
+    if (! (rsvec || lsvec)) {
+
+/*         Singular Values only */
+
+/* Computing MIN */
+	i__2 = *n - 1;
+	i__1 = f2cmin(i__2,nr);
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = *n - p;
+	    dcopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * 
+		    a_dim1], &c__1);
+/* L1946: */
+	}
+
+/*        The following two DO-loops introduce small relative perturbation */
+/*        into the strict upper triangle of the lower triangular matrix. */
+/*        Small entries below the main diagonal are also changed. */
+/*        This modification is useful if the computing environment does not */
+/*        provide/allow FLUSH TO ZERO underflow, for it prevents many */
+/*        annoying denormalized numbers in case of strongly scaled matrices. */
+/*        The perturbation is structured so that it does not introduce any */
+/*        new perturbation of the singular values, and it does not destroy */
+/*        the job done by the preconditioner. */
+/*        The licence for this perturbation is in the variable L2PERT, which */
+/*        should be .FALSE. if FLUSH TO ZERO underflow is active. */
+
+	if (! almort) {
+
+	    if (l2pert) {
+/*              XSC = DSQRT(SMALL) */
+		xsc = epsln / (doublereal) (*n);
+		i__1 = nr;
+		for (q = 1; q <= i__1; ++q) {
+		    temp1 = xsc * (d__1 = a[q + q * a_dim1], abs(d__1));
+		    i__2 = *n;
+		    for (p = 1; p <= i__2; ++p) {
+			if (p > q && (d__1 = a[p + q * a_dim1], abs(d__1)) <= 
+				temp1 || p < q) {
+			    a[p + q * a_dim1] = d_sign(&temp1, &a[p + q * 
+				    a_dim1]);
+			}
+/* L4949: */
+		    }
+/* L4947: */
+		}
+	    } else {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + 
+			1], lda);
+	    }
+
+
+	    i__1 = *lwork - *n;
+	    dgeqrf_(n, &nr, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1,
+		     &ierr);
+
+	    i__1 = nr - 1;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = nr - p;
+		dcopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * 
+			a_dim1], &c__1);
+/* L1948: */
+	    }
+
+	}
+
+/*           Row-cyclic Jacobi SVD algorithm with column pivoting */
+
+/*           to drown denormals */
+	if (l2pert) {
+/*              XSC = DSQRT(SMALL) */
+	    xsc = epsln / (doublereal) (*n);
+	    i__1 = nr;
+	    for (q = 1; q <= i__1; ++q) {
+		temp1 = xsc * (d__1 = a[q + q * a_dim1], abs(d__1));
+		i__2 = nr;
+		for (p = 1; p <= i__2; ++p) {
+		    if (p > q && (d__1 = a[p + q * a_dim1], abs(d__1)) <= 
+			    temp1 || p < q) {
+			a[p + q * a_dim1] = d_sign(&temp1, &a[p + q * a_dim1])
+				;
+		    }
+/* L1949: */
+		}
+/* L1947: */
+	    }
+	} else {
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + 1], 
+		    lda);
+	}
+
+/*           triangular matrix (plus perturbation which is ignored in */
+/*           the part which destroys triangular form (confusing?!)) */
+
+	dgesvj_("L", "NoU", "NoV", &nr, &nr, &a[a_offset], lda, &sva[1], n, &
+		v[v_offset], ldv, &work[1], lwork, info);
+
+	scalem = work[1];
+	numrank = i_dnnt(&work[2]);
+
+
+    } else if (rsvec && ! lsvec) {
+
+/*        -> Singular Values and Right Singular Vectors <- */
+
+	if (almort) {
+
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n - p + 1;
+		dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], &
+			c__1);
+/* L1998: */
+	    }
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 
+		    1], ldv);
+
+	    dgesvj_("L", "U", "N", n, &nr, &v[v_offset], ldv, &sva[1], &nr, &
+		    a[a_offset], lda, &work[1], lwork, info);
+	    scalem = work[1];
+	    numrank = i_dnnt(&work[2]);
+	} else {
+
+/*        accumulated product of Jacobi rotations, three are perfect ) */
+
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    dlaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &a[a_dim1 + 2], 
+		    lda);
+	    i__1 = *lwork - *n;
+	    dgelqf_(&nr, n, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1,
+		     &ierr);
+	    dlacpy_("Lower", &nr, &nr, &a[a_offset], lda, &v[v_offset], ldv);
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 
+		    1], ldv);
+	    i__1 = *lwork - (*n << 1);
+	    dgeqrf_(&nr, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << 
+		    1) + 1], &i__1, &ierr);
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = nr - p + 1;
+		dcopy_(&i__2, &v[p + p * v_dim1], ldv, &v[p + p * v_dim1], &
+			c__1);
+/* L8998: */
+	    }
+	    i__1 = nr - 1;
+	    i__2 = nr - 1;
+	    dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 
+		    1], ldv);
+
+	    dgesvj_("Lower", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[1], &
+		    nr, &u[u_offset], ldu, &work[*n + 1], lwork, info);
+	    scalem = work[*n + 1];
+	    numrank = i_dnnt(&work[*n + 2]);
+	    if (nr < *n) {
+		i__1 = *n - nr;
+		dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], 
+			ldv);
+		i__1 = *n - nr;
+		dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 
+			+ 1], ldv);
+		i__1 = *n - nr;
+		i__2 = *n - nr;
+		dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + 
+			1) * v_dim1], ldv);
+	    }
+
+	    i__1 = *lwork - *n;
+	    dormlq_("Left", "Transpose", n, n, &nr, &a[a_offset], lda, &work[
+		    1], &v[v_offset], ldv, &work[*n + 1], &i__1, &ierr);
+
+	}
+
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    dcopy_(n, &v[p + v_dim1], ldv, &a[iwork[p] + a_dim1], lda);
+/* L8991: */
+	}
+	dlacpy_("All", n, n, &a[a_offset], lda, &v[v_offset], ldv);
+
+	if (transp) {
+	    dlacpy_("All", n, n, &v[v_offset], ldv, &u[u_offset], ldu);
+	}
+
+    } else if (lsvec && ! rsvec) {
+
+
+/*        Jacobi rotations in the Jacobi iterations. */
+	i__1 = nr;
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = *n - p + 1;
+	    dcopy_(&i__2, &a[p + p * a_dim1], lda, &u[p + p * u_dim1], &c__1);
+/* L1965: */
+	}
+	i__1 = nr - 1;
+	i__2 = nr - 1;
+	dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], 
+		ldu);
+
+	i__1 = *lwork - (*n << 1);
+	dgeqrf_(n, &nr, &u[u_offset], ldu, &work[*n + 1], &work[(*n << 1) + 1]
+		, &i__1, &ierr);
+
+	i__1 = nr - 1;
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = nr - p;
+	    dcopy_(&i__2, &u[p + (p + 1) * u_dim1], ldu, &u[p + 1 + p * 
+		    u_dim1], &c__1);
+/* L1967: */
+	}
+	i__1 = nr - 1;
+	i__2 = nr - 1;
+	dlaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], 
+		ldu);
+
+	i__1 = *lwork - *n;
+	dgesvj_("Lower", "U", "N", &nr, &nr, &u[u_offset], ldu, &sva[1], &nr, 
+		&a[a_offset], lda, &work[*n + 1], &i__1, info);
+	scalem = work[*n + 1];
+	numrank = i_dnnt(&work[*n + 2]);
+
+	if (nr < *m) {
+	    i__1 = *m - nr;
+	    dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], ldu);
+	    if (nr < n1) {
+		i__1 = n1 - nr;
+		dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * u_dim1 
+			+ 1], ldu);
+		i__1 = *m - nr;
+		i__2 = n1 - nr;
+		dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (nr + 
+			1) * u_dim1], ldu);
+	    }
+	}
+
+	i__1 = *lwork - *n;
+	dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], &u[
+		u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+
+	if (rowpiv) {
+	    i__1 = *m - 1;
+	    dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + 
+		    1], &c_n1);
+	}
+
+	i__1 = n1;
+	for (p = 1; p <= i__1; ++p) {
+	    xsc = 1. / dnrm2_(m, &u[p * u_dim1 + 1], &c__1);
+	    dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1);
+/* L1974: */
+	}
+
+	if (transp) {
+	    dlacpy_("All", n, n, &u[u_offset], ldu, &v[v_offset], ldv);
+	}
+
+    } else {
+
+
+	if (! jracc) {
+
+	    if (! almort) {
+
+/*           Second Preconditioning Step (QRF [with pivoting]) */
+/*           Note that the composition of TRANSPOSE, QRF and TRANSPOSE is */
+/*           equivalent to an LQF CALL. Since in many libraries the QRF */
+/*           seems to be better optimized than the LQF, we do explicit */
+/*           transpose and use the QRF. This is subject to changes in an */
+/*           optimized implementation of DGEJSV. */
+
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = *n - p + 1;
+		    dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1],
+			     &c__1);
+/* L1968: */
+		}
+
+/*           denormals in the second QR factorization, where they are */
+/*           as good as zeros. This is done to avoid painfully slow */
+/*           computation with denormals. The relative size of the perturbation */
+/*           is a parameter that can be changed by the implementer. */
+/*           This perturbation device will be obsolete on machines with */
+/*           properly implemented arithmetic. */
+/*           To switch it off, set L2PERT=.FALSE. To remove it from  the */
+/*           code, remove the action under L2PERT=.TRUE., leave the ELSE part. */
+/*           The following two loops should be blocked and fused with the */
+/*           transposed copy above. */
+
+		if (l2pert) {
+		    xsc = sqrt(small);
+		    i__1 = nr;
+		    for (q = 1; q <= i__1; ++q) {
+			temp1 = xsc * (d__1 = v[q + q * v_dim1], abs(d__1));
+			i__2 = *n;
+			for (p = 1; p <= i__2; ++p) {
+			    if (p > q && (d__1 = v[p + q * v_dim1], abs(d__1))
+				     <= temp1 || p < q) {
+				v[p + q * v_dim1] = d_sign(&temp1, &v[p + q * 
+					v_dim1]);
+			    }
+			    if (p < q) {
+				v[p + q * v_dim1] = -v[p + q * v_dim1];
+			    }
+/* L2968: */
+			}
+/* L2969: */
+		    }
+		} else {
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 
+			    1) + 1], ldv);
+		}
+
+/*           Estimate the row scaled condition number of R1 */
+/*           (If R1 is rectangular, N > NR, then the condition number */
+/*           of the leading NR x NR submatrix is estimated.) */
+
+		dlacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1]
+			, &nr);
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = nr - p + 1;
+		    temp1 = dnrm2_(&i__2, &work[(*n << 1) + (p - 1) * nr + p],
+			     &c__1);
+		    i__2 = nr - p + 1;
+		    d__1 = 1. / temp1;
+		    dscal_(&i__2, &d__1, &work[(*n << 1) + (p - 1) * nr + p], 
+			    &c__1);
+/* L3950: */
+		}
+		dpocon_("Lower", &nr, &work[(*n << 1) + 1], &nr, &c_b35, &
+			temp1, &work[(*n << 1) + nr * nr + 1], &iwork[*m + (*
+			n << 1) + 1], &ierr);
+		condr1 = 1. / sqrt(temp1);
+/*           R1 is OK for inverse <=> CONDR1 .LT. DBLE(N) */
+/*           more conservative    <=> CONDR1 .LT. DSQRT(DBLE(N)) */
+
+		cond_ok__ = sqrt((doublereal) nr);
+/* [TP]       COND_OK is a tuning parameter. */
+		if (condr1 < cond_ok__) {
+/*              implementation, this QRF should be implemented as the QRF */
+/*              of a lower triangular matrix. */
+/*              R1^t = Q2 * R2 */
+		    i__1 = *lwork - (*n << 1);
+		    dgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*
+			    n << 1) + 1], &i__1, &ierr);
+
+		    if (l2pert) {
+			xsc = sqrt(small) / epsln;
+			i__1 = nr;
+			for (p = 2; p <= i__1; ++p) {
+			    i__2 = p - 1;
+			    for (q = 1; q <= i__2; ++q) {
+/* Computing MIN */
+				d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), 
+					d__4 = (d__2 = v[q + q * v_dim1], abs(
+					d__2));
+				temp1 = xsc * f2cmin(d__3,d__4);
+				if ((d__1 = v[q + p * v_dim1], abs(d__1)) <= 
+					temp1) {
+				    v[q + p * v_dim1] = d_sign(&temp1, &v[q + 
+					    p * v_dim1]);
+				}
+/* L3958: */
+			    }
+/* L3959: */
+			}
+		    }
+
+		    if (nr != *n) {
+			dlacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << 
+				1) + 1], n);
+		    }
+
+		    i__1 = nr - 1;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = nr - p;
+			dcopy_(&i__2, &v[p + (p + 1) * v_dim1], ldv, &v[p + 1 
+				+ p * v_dim1], &c__1);
+/* L1969: */
+		    }
+
+		    condr2 = condr1;
+
+		} else {
+
+/*              Note that windowed pivoting would be equally good */
+/*              numerically, and more run-time efficient. So, in */
+/*              an optimal implementation, the next call to DGEQP3 */
+/*              should be replaced with eg. CALL SGEQPX (ACM TOMS #782) */
+/*              with properly (carefully) chosen parameters. */
+
+/*              R1^t * P2 = Q2 * R2 */
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			iwork[*n + p] = 0;
+/* L3003: */
+		    }
+		    i__1 = *lwork - (*n << 1);
+		    dgeqp3_(n, &nr, &v[v_offset], ldv, &iwork[*n + 1], &work[*
+			    n + 1], &work[(*n << 1) + 1], &i__1, &ierr);
+/* *               CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), */
+/* *     $              LWORK-2*N, IERR ) */
+		    if (l2pert) {
+			xsc = sqrt(small);
+			i__1 = nr;
+			for (p = 2; p <= i__1; ++p) {
+			    i__2 = p - 1;
+			    for (q = 1; q <= i__2; ++q) {
+/* Computing MIN */
+				d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), 
+					d__4 = (d__2 = v[q + q * v_dim1], abs(
+					d__2));
+				temp1 = xsc * f2cmin(d__3,d__4);
+				if ((d__1 = v[q + p * v_dim1], abs(d__1)) <= 
+					temp1) {
+				    v[q + p * v_dim1] = d_sign(&temp1, &v[q + 
+					    p * v_dim1]);
+				}
+/* L3968: */
+			    }
+/* L3969: */
+			}
+		    }
+
+		    dlacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 
+			    1], n);
+
+		    if (l2pert) {
+			xsc = sqrt(small);
+			i__1 = nr;
+			for (p = 2; p <= i__1; ++p) {
+			    i__2 = p - 1;
+			    for (q = 1; q <= i__2; ++q) {
+/* Computing MIN */
+				d__3 = (d__1 = v[p + p * v_dim1], abs(d__1)), 
+					d__4 = (d__2 = v[q + q * v_dim1], abs(
+					d__2));
+				temp1 = xsc * f2cmin(d__3,d__4);
+				v[p + q * v_dim1] = -d_sign(&temp1, &v[q + p *
+					 v_dim1]);
+/* L8971: */
+			    }
+/* L8970: */
+			}
+		    } else {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			dlaset_("L", &i__1, &i__2, &c_b34, &c_b34, &v[v_dim1 
+				+ 2], ldv);
+		    }
+/*              Now, compute R2 = L3 * Q3, the LQ factorization. */
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    dgelqf_(&nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + *n 
+			    * nr + 1], &work[(*n << 1) + *n * nr + nr + 1], &
+			    i__1, &ierr);
+		    dlacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) 
+			    + *n * nr + nr + 1], &nr);
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			temp1 = dnrm2_(&p, &work[(*n << 1) + *n * nr + nr + p]
+				, &nr);
+			d__1 = 1. / temp1;
+			dscal_(&p, &d__1, &work[(*n << 1) + *n * nr + nr + p],
+				 &nr);
+/* L4950: */
+		    }
+		    dpocon_("L", &nr, &work[(*n << 1) + *n * nr + nr + 1], &
+			    nr, &c_b35, &temp1, &work[(*n << 1) + *n * nr + 
+			    nr + nr * nr + 1], &iwork[*m + (*n << 1) + 1], &
+			    ierr);
+		    condr2 = 1. / sqrt(temp1);
+
+		    if (condr2 >= cond_ok__) {
+/*                 (this overwrites the copy of R2, as it will not be */
+/*                 needed in this branch, but it does not overwritte the */
+/*                 Huseholder vectors of Q2.). */
+			dlacpy_("U", &nr, &nr, &v[v_offset], ldv, &work[(*n <<
+				 1) + 1], n);
+/*                 WORK(2*N+N*NR+1:2*N+N*NR+N) */
+		    }
+
+		}
+
+		if (l2pert) {
+		    xsc = sqrt(small);
+		    i__1 = nr;
+		    for (q = 2; q <= i__1; ++q) {
+			temp1 = xsc * v[q + q * v_dim1];
+			i__2 = q - 1;
+			for (p = 1; p <= i__2; ++p) {
+/*                    V(p,q) = - DSIGN( TEMP1, V(q,p) ) */
+			    v[p + q * v_dim1] = -d_sign(&temp1, &v[p + q * 
+				    v_dim1]);
+/* L4969: */
+			}
+/* L4968: */
+		    }
+		} else {
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 
+			    1) + 1], ldv);
+		}
+
+/*        Second preconditioning finished; continue with Jacobi SVD */
+/*        The input matrix is lower trinagular. */
+
+/*        Recover the right singular vectors as solution of a well */
+/*        conditioned triangular matrix equation. */
+
+		if (condr1 < cond_ok__) {
+
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    dgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[
+			    1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n *
+			     nr + nr + 1], &i__1, info);
+		    scalem = work[(*n << 1) + *n * nr + nr + 1];
+		    numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]);
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			dcopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 
+				+ 1], &c__1);
+			dscal_(&nr, &sva[p], &v[p * v_dim1 + 1], &c__1);
+/* L3970: */
+		    }
+
+		    if (nr == *n) {
+/* :))             .. best case, R1 is inverted. The solution of this matrix */
+/*                 equation is Q2*V2 = the product of the Jacobi rotations */
+/*                 used in DGESVJ, premultiplied with the orthogonal matrix */
+/*                 from the second QR factorization. */
+			dtrsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &a[
+				a_offset], lda, &v[v_offset], ldv);
+		    } else {
+/*                 is inverted to get the product of the Jacobi rotations */
+/*                 used in DGESVJ. The Q-factor from the second QR */
+/*                 factorization is then built in explicitly. */
+			dtrsm_("L", "U", "T", "N", &nr, &nr, &c_b35, &work[(*
+				n << 1) + 1], n, &v[v_offset], ldv);
+			if (nr < *n) {
+			    i__1 = *n - nr;
+			    dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 
+				    1 + v_dim1], ldv);
+			    i__1 = *n - nr;
+			    dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 
+				    1) * v_dim1 + 1], ldv);
+			    i__1 = *n - nr;
+			    i__2 = *n - nr;
+			    dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr 
+				    + 1 + (nr + 1) * v_dim1], ldv);
+			}
+			i__1 = *lwork - (*n << 1) - *n * nr - nr;
+			dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, 
+				&work[*n + 1], &v[v_offset], ldv, &work[(*n <<
+				 1) + *n * nr + nr + 1], &i__1, &ierr);
+		    }
+
+		} else if (condr2 < cond_ok__) {
+
+/* :)           .. the input matrix A is very likely a relative of */
+/*              the Kahan matrix :) */
+/*              The matrix R2 is inverted. The solution of the matrix equation */
+/*              is Q3^T*V3 = the product of the Jacobi rotations (appplied to */
+/*              the lower triangular L3 from the LQ factorization of */
+/*              R2=L3*Q3), pre-multiplied with the transposed Q3. */
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    dgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[
+			    1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n *
+			     nr + nr + 1], &i__1, info);
+		    scalem = work[(*n << 1) + *n * nr + nr + 1];
+		    numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]);
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			dcopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 
+				+ 1], &c__1);
+			dscal_(&nr, &sva[p], &u[p * u_dim1 + 1], &c__1);
+/* L3870: */
+		    }
+		    dtrsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &work[(*n << 
+			    1) + 1], n, &u[u_offset], ldu);
+		    i__1 = nr;
+		    for (q = 1; q <= i__1; ++q) {
+			i__2 = nr;
+			for (p = 1; p <= i__2; ++p) {
+			    work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = 
+				    u[p + q * u_dim1];
+/* L872: */
+			}
+			i__2 = nr;
+			for (p = 1; p <= i__2; ++p) {
+			    u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr 
+				    + p];
+/* L874: */
+			}
+/* L873: */
+		    }
+		    if (nr < *n) {
+			i__1 = *n - nr;
+			dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + 
+				v_dim1], ldv);
+			i__1 = *n - nr;
+			dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) *
+				 v_dim1 + 1], ldv);
+			i__1 = *n - nr;
+			i__2 = *n - nr;
+			dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 
+				+ (nr + 1) * v_dim1], ldv);
+		    }
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &
+			    work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) 
+			    + *n * nr + nr + 1], &i__1, &ierr);
+		} else {
+/*              Last line of defense. */
+/* #:(          This is a rather pathological case: no scaled condition */
+/*              improvement after two pivoted QR factorizations. Other */
+/*              possibility is that the rank revealing QR factorization */
+/*              or the condition estimator has failed, or the COND_OK */
+/*              is set very close to ONE (which is unnecessary). Normally, */
+/*              this branch should never be executed, but in rare cases of */
+/*              failure of the RRQR or condition estimator, the last line of */
+/*              defense ensures that DGEJSV completes the task. */
+/*              Compute the full SVD of L3 using DGESVJ with explicit */
+/*              accumulation of Jacobi rotations. */
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    dgesvj_("L", "U", "V", &nr, &nr, &v[v_offset], ldv, &sva[
+			    1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n *
+			     nr + nr + 1], &i__1, info);
+		    scalem = work[(*n << 1) + *n * nr + nr + 1];
+		    numrank = i_dnnt(&work[(*n << 1) + *n * nr + nr + 2]);
+		    if (nr < *n) {
+			i__1 = *n - nr;
+			dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + 
+				v_dim1], ldv);
+			i__1 = *n - nr;
+			dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) *
+				 v_dim1 + 1], ldv);
+			i__1 = *n - nr;
+			i__2 = *n - nr;
+			dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 
+				+ (nr + 1) * v_dim1], ldv);
+		    }
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &
+			    work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) 
+			    + *n * nr + nr + 1], &i__1, &ierr);
+
+		    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+		    dormlq_("L", "T", &nr, &nr, &nr, &work[(*n << 1) + 1], n, 
+			    &work[(*n << 1) + *n * nr + 1], &u[u_offset], ldu,
+			     &work[(*n << 1) + *n * nr + nr + 1], &i__1, &
+			    ierr);
+		    i__1 = nr;
+		    for (q = 1; q <= i__1; ++q) {
+			i__2 = nr;
+			for (p = 1; p <= i__2; ++p) {
+			    work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = 
+				    u[p + q * u_dim1];
+/* L772: */
+			}
+			i__2 = nr;
+			for (p = 1; p <= i__2; ++p) {
+			    u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr 
+				    + p];
+/* L774: */
+			}
+/* L773: */
+		    }
+
+		}
+
+/*           Permute the rows of V using the (column) permutation from the */
+/*           first QRF. Also, scale the columns to make them unit in */
+/*           Euclidean norm. This applies to all cases. */
+
+		temp1 = sqrt((doublereal) (*n)) * epsln;
+		i__1 = *n;
+		for (q = 1; q <= i__1; ++q) {
+		    i__2 = *n;
+		    for (p = 1; p <= i__2; ++p) {
+			work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * 
+				v_dim1];
+/* L972: */
+		    }
+		    i__2 = *n;
+		    for (p = 1; p <= i__2; ++p) {
+			v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p]
+				;
+/* L973: */
+		    }
+		    xsc = 1. / dnrm2_(n, &v[q * v_dim1 + 1], &c__1);
+		    if (xsc < 1. - temp1 || xsc > temp1 + 1.) {
+			dscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1);
+		    }
+/* L1972: */
+		}
+/*           At this moment, V contains the right singular vectors of A. */
+/*           Next, assemble the left singular vector matrix U (M x N). */
+		if (nr < *m) {
+		    i__1 = *m - nr;
+		    dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + 
+			    u_dim1], ldu);
+		    if (nr < n1) {
+			i__1 = n1 - nr;
+			dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) *
+				 u_dim1 + 1], ldu);
+			i__1 = *m - nr;
+			i__2 = n1 - nr;
+			dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 
+				+ (nr + 1) * u_dim1], ldu);
+		    }
+		}
+
+/*           The Q matrix from the first QRF is built into the left singular */
+/*           matrix U. This applies to all cases. */
+
+		i__1 = *lwork - *n;
+		dormqr_("Left", "No_Tr", m, &n1, n, &a[a_offset], lda, &work[
+			1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+/*           The columns of U are normalized. The cost is O(M*N) flops. */
+		temp1 = sqrt((doublereal) (*m)) * epsln;
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    xsc = 1. / dnrm2_(m, &u[p * u_dim1 + 1], &c__1);
+		    if (xsc < 1. - temp1 || xsc > temp1 + 1.) {
+			dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1);
+		    }
+/* L1973: */
+		}
+
+/*           If the initial QRF is computed with row pivoting, the left */
+/*           singular vectors must be adjusted. */
+
+		if (rowpiv) {
+		    i__1 = *m - 1;
+		    dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n 
+			    << 1) + 1], &c_n1);
+		}
+
+	    } else {
+
+/*        the second QRF is not needed */
+
+		dlacpy_("Upper", n, n, &a[a_offset], lda, &work[*n + 1], n);
+		if (l2pert) {
+		    xsc = sqrt(small);
+		    i__1 = *n;
+		    for (p = 2; p <= i__1; ++p) {
+			temp1 = xsc * work[*n + (p - 1) * *n + p];
+			i__2 = p - 1;
+			for (q = 1; q <= i__2; ++q) {
+			    work[*n + (q - 1) * *n + p] = -d_sign(&temp1, &
+				    work[*n + (p - 1) * *n + q]);
+/* L5971: */
+			}
+/* L5970: */
+		    }
+		} else {
+		    i__1 = *n - 1;
+		    i__2 = *n - 1;
+		    dlaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &work[*n + 
+			    2], n);
+		}
+
+		i__1 = *lwork - *n - *n * *n;
+		dgesvj_("Upper", "U", "N", n, n, &work[*n + 1], n, &sva[1], n,
+			 &u[u_offset], ldu, &work[*n + *n * *n + 1], &i__1, 
+			info);
+
+		scalem = work[*n + *n * *n + 1];
+		numrank = i_dnnt(&work[*n + *n * *n + 2]);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    dcopy_(n, &work[*n + (p - 1) * *n + 1], &c__1, &u[p * 
+			    u_dim1 + 1], &c__1);
+		    dscal_(n, &sva[p], &work[*n + (p - 1) * *n + 1], &c__1);
+/* L6970: */
+		}
+
+		dtrsm_("Left", "Upper", "NoTrans", "No UD", n, n, &c_b35, &a[
+			a_offset], lda, &work[*n + 1], n);
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    dcopy_(n, &work[*n + p], n, &v[iwork[p] + v_dim1], ldv);
+/* L6972: */
+		}
+		temp1 = sqrt((doublereal) (*n)) * epsln;
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    xsc = 1. / dnrm2_(n, &v[p * v_dim1 + 1], &c__1);
+		    if (xsc < 1. - temp1 || xsc > temp1 + 1.) {
+			dscal_(n, &xsc, &v[p * v_dim1 + 1], &c__1);
+		    }
+/* L6971: */
+		}
+
+/*           Assemble the left singular vector matrix U (M x N). */
+
+		if (*n < *m) {
+		    i__1 = *m - *n;
+		    dlaset_("A", &i__1, n, &c_b34, &c_b34, &u[*n + 1 + u_dim1]
+			    , ldu);
+		    if (*n < n1) {
+			i__1 = n1 - *n;
+			dlaset_("A", n, &i__1, &c_b34, &c_b34, &u[(*n + 1) * 
+				u_dim1 + 1], ldu);
+			i__1 = *m - *n;
+			i__2 = n1 - *n;
+			dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[*n + 1 
+				+ (*n + 1) * u_dim1], ldu);
+		    }
+		}
+		i__1 = *lwork - *n;
+		dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[
+			1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+		temp1 = sqrt((doublereal) (*m)) * epsln;
+		i__1 = n1;
+		for (p = 1; p <= i__1; ++p) {
+		    xsc = 1. / dnrm2_(m, &u[p * u_dim1 + 1], &c__1);
+		    if (xsc < 1. - temp1 || xsc > temp1 + 1.) {
+			dscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1);
+		    }
+/* L6973: */
+		}
+
+		if (rowpiv) {
+		    i__1 = *m - 1;
+		    dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n 
+			    << 1) + 1], &c_n1);
+		}
+
+	    }
+
+/*        end of the  >> almost orthogonal case <<  in the full SVD */
+
+	} else {
+
+/*        This branch deploys a preconditioned Jacobi SVD with explicitly */
+/*        accumulated rotations. It is included as optional, mainly for */
+/*        experimental purposes. It does perform well, and can also be used. */
+/*        In this implementation, this branch will be automatically activated */
+/*        if the  condition number sigma_max(A) / sigma_min(A) is predicted */
+/*        to be greater than the overflow threshold. This is because the */
+/*        a posteriori computation of the singular vectors assumes robust */
+/*        implementation of BLAS and some LAPACK procedures, capable of working */
+/*        in presence of extreme values. Since that is not always the case, ... */
+
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n - p + 1;
+		dcopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], &
+			c__1);
+/* L7968: */
+	    }
+
+	    if (l2pert) {
+		xsc = sqrt(small / epsln);
+		i__1 = nr;
+		for (q = 1; q <= i__1; ++q) {
+		    temp1 = xsc * (d__1 = v[q + q * v_dim1], abs(d__1));
+		    i__2 = *n;
+		    for (p = 1; p <= i__2; ++p) {
+			if (p > q && (d__1 = v[p + q * v_dim1], abs(d__1)) <= 
+				temp1 || p < q) {
+			    v[p + q * v_dim1] = d_sign(&temp1, &v[p + q * 
+				    v_dim1]);
+			}
+			if (p < q) {
+			    v[p + q * v_dim1] = -v[p + q * v_dim1];
+			}
+/* L5968: */
+		    }
+/* L5969: */
+		}
+	    } else {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + 
+			1], ldv);
+	    }
+	    i__1 = *lwork - (*n << 1);
+	    dgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << 1) 
+		    + 1], &i__1, &ierr);
+	    dlacpy_("L", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1], n);
+
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = nr - p + 1;
+		dcopy_(&i__2, &v[p + p * v_dim1], ldv, &u[p + p * u_dim1], &
+			c__1);
+/* L7969: */
+	    }
+	    if (l2pert) {
+		xsc = sqrt(small / epsln);
+		i__1 = nr;
+		for (q = 2; q <= i__1; ++q) {
+		    i__2 = q - 1;
+		    for (p = 1; p <= i__2; ++p) {
+/* Computing MIN */
+			d__3 = (d__1 = u[p + p * u_dim1], abs(d__1)), d__4 = (
+				d__2 = u[q + q * u_dim1], abs(d__2));
+			temp1 = xsc * f2cmin(d__3,d__4);
+			u[p + q * u_dim1] = -d_sign(&temp1, &u[q + p * u_dim1]
+				);
+/* L9971: */
+		    }
+/* L9970: */
+		}
+	    } else {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		dlaset_("U", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 
+			1], ldu);
+	    }
+	    i__1 = *lwork - (*n << 1) - *n * nr;
+	    dgesvj_("G", "U", "V", &nr, &nr, &u[u_offset], ldu, &sva[1], n, &
+		    v[v_offset], ldv, &work[(*n << 1) + *n * nr + 1], &i__1, 
+		    info);
+	    scalem = work[(*n << 1) + *n * nr + 1];
+	    numrank = i_dnnt(&work[(*n << 1) + *n * nr + 2]);
+	    if (nr < *n) {
+		i__1 = *n - nr;
+		dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], 
+			ldv);
+		i__1 = *n - nr;
+		dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 
+			+ 1], ldv);
+		i__1 = *n - nr;
+		i__2 = *n - nr;
+		dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + 
+			1) * v_dim1], ldv);
+	    }
+	    i__1 = *lwork - (*n << 1) - *n * nr - nr;
+	    dormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &work[*n + 
+		    1], &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1]
+		    , &i__1, &ierr);
+
+/*           Permute the rows of V using the (column) permutation from the */
+/*           first QRF. Also, scale the columns to make them unit in */
+/*           Euclidean norm. This applies to all cases. */
+
+	    temp1 = sqrt((doublereal) (*n)) * epsln;
+	    i__1 = *n;
+	    for (q = 1; q <= i__1; ++q) {
+		i__2 = *n;
+		for (p = 1; p <= i__2; ++p) {
+		    work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * 
+			    v_dim1];
+/* L8972: */
+		}
+		i__2 = *n;
+		for (p = 1; p <= i__2; ++p) {
+		    v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p];
+/* L8973: */
+		}
+		xsc = 1. / dnrm2_(n, &v[q * v_dim1 + 1], &c__1);
+		if (xsc < 1. - temp1 || xsc > temp1 + 1.) {
+		    dscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1);
+		}
+/* L7972: */
+	    }
+
+/*           At this moment, V contains the right singular vectors of A. */
+/*           Next, assemble the left singular vector matrix U (M x N). */
+
+	    if (nr < *m) {
+		i__1 = *m - nr;
+		dlaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], 
+			ldu);
+		if (nr < n1) {
+		    i__1 = n1 - nr;
+		    dlaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * 
+			    u_dim1 + 1], ldu);
+		    i__1 = *m - nr;
+		    i__2 = n1 - nr;
+		    dlaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (
+			    nr + 1) * u_dim1], ldu);
+		}
+	    }
+
+	    i__1 = *lwork - *n;
+	    dormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], &
+		    u[u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+
+	    if (rowpiv) {
+		i__1 = *m - 1;
+		dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1)
+			 + 1], &c_n1);
+	    }
+
+
+	}
+	if (transp) {
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		dswap_(n, &u[p * u_dim1 + 1], &c__1, &v[p * v_dim1 + 1], &
+			c__1);
+/* L6974: */
+	    }
+	}
+
+    }
+/*     end of the full SVD */
+
+/*     Undo scaling, if necessary (and possible) */
+
+    if (uscal2 <= big / sva[1] * uscal1) {
+	dlascl_("G", &c__0, &c__0, &uscal1, &uscal2, &nr, &c__1, &sva[1], n, &
+		ierr);
+	uscal1 = 1.;
+	uscal2 = 1.;
+    }
+
+    if (nr < *n) {
+	i__1 = *n;
+	for (p = nr + 1; p <= i__1; ++p) {
+	    sva[p] = 0.;
+/* L3004: */
+	}
+    }
+
+    work[1] = uscal2 * scalem;
+    work[2] = uscal1;
+    if (errest) {
+	work[3] = sconda;
+    }
+    if (lsvec && rsvec) {
+	work[4] = condr1;
+	work[5] = condr2;
+    }
+    if (l2tran) {
+	work[6] = entra;
+	work[7] = entrat;
+    }
+
+    iwork[1] = nr;
+    iwork[2] = numrank;
+    iwork[3] = warning;
+
+    return 0;
+} /* dgejsv_ */
+
diff --git a/lapack-netlib/SRC/dgelq.c b/lapack-netlib/SRC/dgelq.c
new file mode 100644
index 000000000..9d2e7860d
--- /dev/null
+++ b/lapack-netlib/SRC/dgelq.c
@@ -0,0 +1,745 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__2 = 2;
+
+/* > \brief \b DGELQ */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, */
+/*                         INFO ) */
+
+/*       INTEGER           INFO, LDA, M, N, TSIZE, LWORK */
+/*       DOUBLE PRECISION  A( LDA, * ), T( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELQ computes an LQ factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = ( L 0 ) *  Q */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a N-by-N orthogonal matrix; */
+/* >    L is a lower-triangular M-by-M matrix; */
+/* >    0 is a M-by-(N-M) zero matrix, if M < N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and below the diagonal of the array */
+/* >          contain the M-by-f2cmin(M,N) lower trapezoidal matrix L */
+/* >          (L is lower triangular if M <= N); */
+/* >          the elements above the diagonal are used to store part of the */
+/* >          data structure to represent Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)) */
+/* >          On exit, if INFO = 0, T(1) returns optimal (or either minimal */
+/* >          or optimal, if query is assumed) TSIZE. See TSIZE for details. */
+/* >          Remaining T contains part of the data structure used to represent Q. */
+/* >          If one wants to apply or construct Q, then one needs to keep T */
+/* >          (in addition to A) and pass it to further subroutines. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TSIZE */
+/* > \verbatim */
+/* >          TSIZE is INTEGER */
+/* >          If TSIZE >= 5, the dimension of the array T. */
+/* >          If TSIZE = -1 or -2, then a workspace query is assumed. The routine */
+/* >          only calculates the sizes of the T and WORK arrays, returns these */
+/* >          values as the first entries of the T and WORK arrays, and no error */
+/* >          message related to T or WORK is issued by XERBLA. */
+/* >          If TSIZE = -1, the routine calculates optimal size of T for the */
+/* >          optimum performance and returns this value in T(1). */
+/* >          If TSIZE = -2, the routine calculates minimal size of T and */
+/* >          returns this value in T(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
+/* >          or optimal, if query was assumed) LWORK. */
+/* >          See LWORK for details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1 or -2, then a workspace query is assumed. The routine */
+/* >          only calculates the sizes of the T and WORK arrays, returns these */
+/* >          values as the first entries of the T and WORK arrays, and no error */
+/* >          message related to T or WORK is issued by XERBLA. */
+/* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
+/* >          optimal performance and returns this value in WORK(1). */
+/* >          If LWORK = -2, the routine calculates minimal size of WORK and */
+/* >          returns this value in WORK(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \par Further Details */
+/*  ==================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The goal of the interface is to give maximum freedom to the developers for */
+/* > creating any LQ factorization algorithm they wish. The triangular */
+/* > (trapezoidal) L has to be stored in the lower part of A. The lower part of A */
+/* > and the array T can be used to store any relevant information for applying or */
+/* > constructing the Q factor. The WORK array can safely be discarded after exit. */
+/* > */
+/* > Caution: One should not expect the sizes of T and WORK to be the same from one */
+/* > LAPACK implementation to the other, or even from one execution to the other. */
+/* > A workspace query (for T and WORK) is needed at each execution. However, */
+/* > for a given execution, the size of T and WORK are fixed and will not change */
+/* > from one query to the next. */
+/* > */
+/* > \endverbatim */
+/* > */
+/* > \par Further Details particular to this LAPACK implementation: */
+/*  ============================================================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > These details are particular for this LAPACK implementation. Users should not */
+/* > take them for granted. These details may change in the future, and are not likely */
+/* > true for another LAPACK implementation. These details are relevant if one wants */
+/* > to try to understand the code. They are not part of the interface. */
+/* > */
+/* > In this version, */
+/* > */
+/* >          T(2): row block size (MB) */
+/* >          T(3): column block size (NB) */
+/* >          T(6:TSIZE): data structure needed for Q, computed by */
+/* >                           DLASWLQ or DGELQT */
+/* > */
+/* >  Depending on the matrix dimensions M and N, and row and column */
+/* >  block sizes MB and NB returned by ILAENV, DGELQ will use either */
+/* >  DLASWLQ (if the matrix is short-and-wide) or DGELQT to compute */
+/* >  the LQ factorization. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgelq_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *t, integer *tsize, doublereal *work, integer *lwork, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    logical mint, minw;
+    integer lwmin, lwreq, lwopt, mb, nb, nblcks;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dgelqt_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    logical lminws, lquery;
+    integer mintsz;
+    extern /* Subroutine */ int dlaswlq_(integer *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --t;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2;
+
+    mint = FALSE_;
+    minw = FALSE_;
+    if (*tsize == -2 || *lwork == -2) {
+	if (*tsize != -1) {
+	    mint = TRUE_;
+	}
+	if (*lwork != -1) {
+	    minw = TRUE_;
+	}
+    }
+
+/*     Determine the block size */
+
+    if (f2cmin(*m,*n) > 0) {
+	mb = ilaenv_(&c__1, "DGELQ ", " ", m, n, &c__1, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	nb = ilaenv_(&c__1, "DGELQ ", " ", m, n, &c__2, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+    } else {
+	mb = 1;
+	nb = *n;
+    }
+    if (mb > f2cmin(*m,*n) || mb < 1) {
+	mb = 1;
+    }
+    if (nb > *n || nb <= *m) {
+	nb = *n;
+    }
+    mintsz = *m + 5;
+    if (nb > *m && *n > *m) {
+	if ((*n - *m) % (nb - *m) == 0) {
+	    nblcks = (*n - *m) / (nb - *m);
+	} else {
+	    nblcks = (*n - *m) / (nb - *m) + 1;
+	}
+    } else {
+	nblcks = 1;
+    }
+
+/*     Determine if the workspace size satisfies minimal size */
+
+    if (*n <= *m || nb <= *m || nb >= *n) {
+	lwmin = f2cmax(1,*n);
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *n;
+	lwopt = f2cmax(i__1,i__2);
+    } else {
+	lwmin = f2cmax(1,*m);
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *m;
+	lwopt = f2cmax(i__1,i__2);
+    }
+    lminws = FALSE_;
+/* Computing MAX */
+    i__1 = 1, i__2 = mb * *m * nblcks + 5;
+    if ((*tsize < f2cmax(i__1,i__2) || *lwork < lwopt) && *lwork >= lwmin && *
+	    tsize >= mintsz && ! lquery) {
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *m * nblcks + 5;
+	if (*tsize < f2cmax(i__1,i__2)) {
+	    lminws = TRUE_;
+	    mb = 1;
+	    nb = *n;
+	}
+	if (*lwork < lwopt) {
+	    lminws = TRUE_;
+	    mb = 1;
+	}
+    }
+    if (*n <= *m || nb <= *m || nb >= *n) {
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *n;
+	lwreq = f2cmax(i__1,i__2);
+    } else {
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *m;
+	lwreq = f2cmax(i__1,i__2);
+    }
+
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = mb * *m * nblcks + 5;
+	if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
+	    *info = -6;
+	} else if (*lwork < lwreq && ! lquery && ! lminws) {
+	    *info = -8;
+	}
+    }
+
+    if (*info == 0) {
+	if (mint) {
+	    t[1] = (doublereal) mintsz;
+	} else {
+	    t[1] = (doublereal) (mb * *m * nblcks + 5);
+	}
+	t[2] = (doublereal) mb;
+	t[3] = (doublereal) nb;
+	if (minw) {
+	    work[1] = (doublereal) lwmin;
+	} else {
+	    work[1] = (doublereal) lwreq;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELQ", &i__1, (ftnlen)5);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (f2cmin(*m,*n) == 0) {
+	return 0;
+    }
+
+/*     The LQ Decomposition */
+
+    if (*n <= *m || nb <= *m || nb >= *n) {
+	dgelqt_(m, n, &mb, &a[a_offset], lda, &t[6], &mb, &work[1], info);
+    } else {
+	dlaswlq_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &mb, &work[1], 
+		lwork, info);
+    }
+
+    work[1] = (doublereal) lwreq;
+
+    return 0;
+
+/*     End of DGELQ */
+
+} /* dgelq_ */
+
diff --git a/lapack-netlib/SRC/dgelq2.c b/lapack-netlib/SRC/dgelq2.c
new file mode 100644
index 000000000..19737b2ed
--- /dev/null
+++ b/lapack-netlib/SRC/dgelq2.c
@@ -0,0 +1,597 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorit
+hm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGELQ2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelq2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelq2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelq2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELQ2 computes an LQ factorization of a real m-by-n matrix A: */
+/* > */
+/* >    A = ( L 0 ) *  Q */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a n-by-n orthogonal matrix; */
+/* >    L is an lower-triangular m-by-m matrix; */
+/* >    0 is a m-by-(n-m) zero matrix, if m < n. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, the elements on and below the diagonal of the array */
+/* >          contain the m by f2cmin(m,n) lower trapezoidal matrix L (L is */
+/* >          lower triangular if m <= n); the elements above the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of elementary reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (M) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
+/* >  and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *), dlarfg_(integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
+    doublereal aii;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELQ2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
+
+	i__2 = *n - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * a_dim1]
+		, lda, &tau[i__]);
+	if (i__ < *m) {
+
+/*           Apply H(i) to A(i+1:m,i:n) from the right */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGELQ2 */
+
+} /* dgelq2_ */
+
diff --git a/lapack-netlib/SRC/dgelqf.c b/lapack-netlib/SRC/dgelqf.c
new file mode 100644
index 000000000..00e2f1f19
--- /dev/null
+++ b/lapack-netlib/SRC/dgelqf.c
@@ -0,0 +1,700 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b DGELQF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGELQF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELQF computes an LQ factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = ( L 0 ) *  Q */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a N-by-N orthogonal matrix; */
+/* >    L is an lower-triangular M-by-M matrix; */
+/* >    0 is a M-by-(N-M) zero matrix, if M < N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and below the diagonal of the array */
+/* >          contain the m-by-f2cmin(m,n) lower trapezoidal matrix L (L is */
+/* >          lower triangular if m <= n); the elements above the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of elementary reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,M). */
+/* >          For optimum performance LWORK >= M*NB, where NB is the */
+/* >          optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */
+/* >  and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo;
+    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer ib, nb;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer nx;
+    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *m * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*lwork < f2cmax(1,*m) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELQF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the LQ factorization of the current block */
+/*           A(i:i+ib-1,i:n) */
+
+	    i__3 = *n - i__ + 1;
+	    dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *m) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i) H(i+1) . . . H(i+ib-1) */
+
+		i__3 = *n - i__ + 1;
+		dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * 
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i+ib:m,i:n) from the right */
+
+		i__3 = *m - i__ - ib + 1;
+		i__4 = *n - i__ + 1;
+		dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, 
+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + 
+			1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGELQF */
+
+} /* dgelqf_ */
+
diff --git a/lapack-netlib/SRC/dgelqt.c b/lapack-netlib/SRC/dgelqt.c
new file mode 100644
index 000000000..03035edd8
--- /dev/null
+++ b/lapack-netlib/SRC/dgelqt.c
@@ -0,0 +1,621 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGELQT */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQRT + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqt.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqt.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqt.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) */
+
+/*       INTEGER INFO, LDA, LDT, M, N, MB */
+/*       DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELQT computes a blocked LQ factorization of a real M-by-N matrix A */
+/* > using the compact WY representation of Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MB */
+/* > \verbatim */
+/* >          MB is INTEGER */
+/* >          The block size to be used in the blocked QR.  MIN(M,N) >= MB >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and below the diagonal of the array */
+/* >          contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is */
+/* >          lower triangular if M <= N); the elements above the diagonal */
+/* >          are the rows of V. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (LDT,MIN(M,N)) */
+/* >          The upper triangular block reflectors stored in compact form */
+/* >          as a sequence of upper triangular blocks.  See below */
+/* >          for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= MB. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MB*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th row */
+/* >  above the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1  v1 v1 v1 v1 ) */
+/* >                   (     1  v2 v2 v2 ) */
+/* >                   (         1 v3 v3 ) */
+/* > */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A. */
+/* >  Let K=MIN(M,N).  The number of blocks is B = ceiling(K/MB), where each */
+/* >  block is of order MB except for the last block, which is of order */
+/* >  IB = K - (B-1)*MB.  For each of the B blocks, a upper triangular block */
+/* >  reflector factor is computed: T1, T2, ..., TB.  The MB-by-MB (and IB-by-IB */
+/* >  for the last block) T's are stored in the MB-by-K matrix T as */
+/* > */
+/* >               T = (T1 T2 ... TB). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgelqt_(integer *m, integer *n, integer *mb, doublereal *
+	a, integer *lda, doublereal *t, integer *ldt, doublereal *work, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    integer i__, k, iinfo, ib;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *), xerbla_(char *, 
+	    integer *, ftnlen), dgelqt3_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/* ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*mb < 1 || *mb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldt < *mb) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELQT", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	return 0;
+    }
+
+/*     Blocked loop of length K */
+
+    i__1 = k;
+    i__2 = *mb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	i__3 = k - i__ + 1;
+	ib = f2cmin(i__3,*mb);
+
+/*     Compute the LQ factorization of the current block A(I:M,I:I+IB-1) */
+
+	i__3 = *n - i__ + 1;
+	dgelqt3_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + 1]
+		, ldt, &iinfo);
+	if (i__ + ib <= *m) {
+
+/*     Update by applying H**T to A(I:M,I+IB:N) from the right */
+
+	    i__3 = *m - i__ - ib + 1;
+	    i__4 = *n - i__ + 1;
+	    i__5 = *m - i__ - ib + 1;
+	    dlarfb_("R", "N", "F", "R", &i__3, &i__4, &ib, &a[i__ + i__ * 
+		    a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + ib + 
+		    i__ * a_dim1], lda, &work[1], &i__5);
+	}
+    }
+    return 0;
+
+/*     End of DGELQT */
+
+} /* dgelqt_ */
+
diff --git a/lapack-netlib/SRC/dgelqt3.c b/lapack-netlib/SRC/dgelqt3.c
new file mode 100644
index 000000000..ae8bc0b71
--- /dev/null
+++ b/lapack-netlib/SRC/dgelqt3.c
@@ -0,0 +1,679 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static doublereal c_b7 = 1.;
+static doublereal c_b19 = -1.;
+
+/* > \brief \b DGELQT3 recursively computes a LQ factorization of a general real or complex matrix using the c
+ompact WY representation of Q. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQRT3 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelqt3
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelqt3
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelqt3
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*        SUBROUTINE DGELQT3( M, N, A, LDA, T, LDT, INFO ) */
+
+/*       INTEGER   INFO, LDA, M, N, LDT */
+/*       DOUBLE PRECISION   A( LDA, * ), T( LDT, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELQT3 recursively computes a LQ factorization of a real M-by-N */
+/* > matrix A, using the compact WY representation of Q. */
+/* > */
+/* > Based on the algorithm of Elmroth and Gustavson, */
+/* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M =< N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
+/* >          below the diagonal contain the N-by-N lower triangular matrix L; the */
+/* >          elements above the diagonal are the rows of V.  See below for */
+/* >          further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (LDT,N) */
+/* >          The N-by-N upper triangular factor of the block reflector. */
+/* >          The elements on and above the diagonal contain the block */
+/* >          reflector T; the elements below the diagonal are not used. */
+/* >          See below for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th row */
+/* >  above the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1  v1 v1 v1 v1 ) */
+/* >                   (     1  v2 v2 v2 ) */
+/* >                   (     1  v3 v3 v3 ) */
+/* > */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
+/* >  block reflector H is then given by */
+/* > */
+/* >               H = I - V * T * V**T */
+/* > */
+/* >  where V**T is the transpose of V. */
+/* > */
+/* >  For details of the algorithm, see Elmroth and Gustavson (cited above). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgelqt3_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *t, integer *ldt, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2;
+
+    /* Local variables */
+    integer i__, j;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer iinfo;
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer i1, j1, m1, m2;
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*ldt < f2cmax(1,*m)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELQT3", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    if (*m == 1) {
+
+/*        Compute Householder transform when N=1 */
+
+	dlarfg_(n, &a[a_offset], &a[f2cmin(2,*n) * a_dim1 + 1], lda, &t[t_offset]
+		);
+
+    } else {
+
+/*        Otherwise, split A into blocks... */
+
+	m1 = *m / 2;
+	m2 = *m - m1;
+/* Computing MIN */
+	i__1 = m1 + 1;
+	i1 = f2cmin(i__1,*m);
+/* Computing MIN */
+	i__1 = *m + 1;
+	j1 = f2cmin(i__1,*n);
+
+/*        Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */
+
+	dgelqt3_(&m1, n, &a[a_offset], lda, &t[t_offset], ldt, &iinfo);
+
+/*        Compute A(J1:M,1:N) = Q1^H A(J1:M,1:N) [workspace: T(1:N1,J1:N)] */
+
+	i__1 = m2;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = m1;
+	    for (j = 1; j <= i__2; ++j) {
+		t[i__ + m1 + j * t_dim1] = a[i__ + m1 + j * a_dim1];
+	    }
+	}
+	dtrmm_("R", "U", "T", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 
+		+ t_dim1], ldt);
+
+	i__1 = *n - m1;
+	dgemm_("N", "T", &m2, &m1, &i__1, &c_b7, &a[i1 + i1 * a_dim1], lda, &
+		a[i1 * a_dim1 + 1], lda, &c_b7, &t[i1 + t_dim1], ldt);
+
+	dtrmm_("R", "U", "N", "N", &m2, &m1, &c_b7, &t[t_offset], ldt, &t[i1 
+		+ t_dim1], ldt);
+
+	i__1 = *n - m1;
+	dgemm_("N", "N", &m2, &i__1, &m1, &c_b19, &t[i1 + t_dim1], ldt, &a[i1 
+		* a_dim1 + 1], lda, &c_b7, &a[i1 + i1 * a_dim1], lda);
+
+	dtrmm_("R", "U", "N", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 
+		+ t_dim1], ldt);
+
+	i__1 = m2;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = m1;
+	    for (j = 1; j <= i__2; ++j) {
+		a[i__ + m1 + j * a_dim1] -= t[i__ + m1 + j * t_dim1];
+		t[i__ + m1 + j * t_dim1] = 0.;
+	    }
+	}
+
+/*        Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */
+
+	i__1 = *n - m1;
+	dgelqt3_(&m2, &i__1, &a[i1 + i1 * a_dim1], lda, &t[i1 + i1 * t_dim1], 
+		ldt, &iinfo);
+
+/*        Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 */
+
+	i__1 = m2;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = m1;
+	    for (j = 1; j <= i__2; ++j) {
+		t[j + (i__ + m1) * t_dim1] = a[j + (i__ + m1) * a_dim1];
+	    }
+	}
+
+	dtrmm_("R", "U", "T", "U", &m1, &m2, &c_b7, &a[i1 + i1 * a_dim1], lda,
+		 &t[i1 * t_dim1 + 1], ldt);
+
+	i__1 = *n - *m;
+	dgemm_("N", "T", &m1, &m2, &i__1, &c_b7, &a[j1 * a_dim1 + 1], lda, &a[
+		i1 + j1 * a_dim1], lda, &c_b7, &t[i1 * t_dim1 + 1], ldt);
+
+	dtrmm_("L", "U", "N", "N", &m1, &m2, &c_b19, &t[t_offset], ldt, &t[i1 
+		* t_dim1 + 1], ldt);
+
+	dtrmm_("R", "U", "N", "N", &m1, &m2, &c_b7, &t[i1 + i1 * t_dim1], ldt,
+		 &t[i1 * t_dim1 + 1], ldt);
+
+
+
+/*        Y = (Y1,Y2); L = [ L1            0  ];  T = [T1 T3] */
+/*                         [ A(1:N1,J1:N)  L2 ]       [ 0 T2] */
+
+    }
+
+    return 0;
+
+/*     End of DGELQT3 */
+
+} /* dgelqt3_ */
+
diff --git a/lapack-netlib/SRC/dgels.c b/lapack-netlib/SRC/dgels.c
new file mode 100644
index 000000000..34fffd2ba
--- /dev/null
+++ b/lapack-netlib/SRC/dgels.c
@@ -0,0 +1,956 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static doublereal c_b33 = 0.;
+static integer c__0 = 0;
+
+/* > \brief <b> DGELS solves overdetermined or underdetermined systems for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGELS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgels.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgels.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgels.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */
+/*                         INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS */
+/*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELS solves overdetermined or underdetermined real linear systems */
+/* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */
+/* > factorization of A.  It is assumed that A has full rank. */
+/* > */
+/* > The following options are provided: */
+/* > */
+/* > 1. If TRANS = 'N' and m >= n:  find the least squares solution of */
+/* >    an overdetermined system, i.e., solve the least squares problem */
+/* >                 minimize || B - A*X ||. */
+/* > */
+/* > 2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
+/* >    an underdetermined system A * X = B. */
+/* > */
+/* > 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of */
+/* >    an underdetermined system A**T * X = B. */
+/* > */
+/* > 4. If TRANS = 'T' and m < n:  find the least squares solution of */
+/* >    an overdetermined system, i.e., solve the least squares problem */
+/* >                 minimize || B - A**T * X ||. */
+/* > */
+/* > Several right hand side vectors b and solution vectors x can be */
+/* > handled in a single call; they are stored as the columns of the */
+/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
+/* > matrix X. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N': the linear system involves A; */
+/* >          = 'T': the linear system involves A**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of */
+/* >          columns of the matrices B and X. NRHS >=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >            if M >= N, A is overwritten by details of its QR */
+/* >                       factorization as returned by DGEQRF; */
+/* >            if M <  N, A is overwritten by details of its LQ */
+/* >                       factorization as returned by DGELQF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the matrix B of right hand side vectors, stored */
+/* >          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
+/* >          if TRANS = 'T'. */
+/* >          On exit, if INFO = 0, B is overwritten by the solution */
+/* >          vectors, stored columnwise: */
+/* >          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
+/* >          squares solution vectors; the residual sum of squares for the */
+/* >          solution in each column is given by the sum of squares of */
+/* >          elements N+1 to M in that column; */
+/* >          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
+/* >          minimum norm solution vectors; */
+/* >          if TRANS = 'T' and m >= n, rows 1 to M of B contain the */
+/* >          minimum norm solution vectors; */
+/* >          if TRANS = 'T' and m < n, rows 1 to M of B contain the */
+/* >          least squares solution vectors; the residual sum of squares */
+/* >          for the solution in each column is given by the sum of */
+/* >          squares of elements M+1 to N in that column. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= MAX(1,M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */
+/* >          For optimal performance, */
+/* >          LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS )*NB ). */
+/* >          where MN = f2cmin(M,N) and NB is the optimum block size. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO =  i, the i-th diagonal element of the */
+/* >                triangular factor of A is zero, so that A does not have */
+/* >                full rank; the least squares solution could not be */
+/* >                computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgels_(char *trans, integer *m, integer *n, integer *
+	nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, 
+	doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    doublereal anrm, bnrm;
+    integer brow;
+    logical tpsd;
+    integer i__, j, iascl, ibscl;
+    extern logical lsame_(char *, char *);
+    integer wsize;
+    doublereal rwork[1];
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
+    integer nb;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    integer mn;
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *), 
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *, 
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dgeqrf_(integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *), dlaset_(char *,
+	     integer *, integer *, doublereal *, doublereal *, doublereal *, 
+	    integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer scllen;
+    doublereal bignum;
+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *), 
+	    dormqr_(char *, char *, integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *);
+    doublereal smlnum;
+    logical lquery;
+    extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    mn = f2cmin(*m,*n);
+    lquery = *lwork == -1;
+    if (! (lsame_(trans, "N") || lsame_(trans, "T"))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*m);
+	if (*ldb < f2cmax(i__1,*n)) {
+	    *info = -8;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs);
+	    if (*lwork < f2cmax(i__1,i__2) && ! lquery) {
+		*info = -10;
+	    }
+	}
+    }
+
+/*     Figure out optimal block size */
+
+    if (*info == 0 || *info == -10) {
+
+	tpsd = TRUE_;
+	if (lsame_(trans, "N")) {
+	    tpsd = FALSE_;
+	}
+
+	if (*m >= *n) {
+	    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    if (tpsd) {
+/* Computing MAX */
+		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LN", m, nrhs, n, &
+			c_n1, (ftnlen)6, (ftnlen)2);
+		nb = f2cmax(i__1,i__2);
+	    } else {
+/* Computing MAX */
+		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, &
+			c_n1, (ftnlen)6, (ftnlen)2);
+		nb = f2cmax(i__1,i__2);
+	    }
+	} else {
+	    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    if (tpsd) {
+/* Computing MAX */
+		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, &
+			c_n1, (ftnlen)6, (ftnlen)2);
+		nb = f2cmax(i__1,i__2);
+	    } else {
+/* Computing MAX */
+		i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LN", n, nrhs, m, &
+			c_n1, (ftnlen)6, (ftnlen)2);
+		nb = f2cmax(i__1,i__2);
+	    }
+	}
+
+/* Computing MAX */
+	i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs) * nb;
+	wsize = f2cmax(i__1,i__2);
+	work[1] = (doublereal) wsize;
+
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELS ", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+/* Computing MIN */
+    i__1 = f2cmin(*m,*n);
+    if (f2cmin(i__1,*nrhs) == 0) {
+	i__1 = f2cmax(*m,*n);
+	dlaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = dlamch_("S") / dlamch_("P");
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, rwork);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 2;
+    } else if (anrm == 0.) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = f2cmax(*m,*n);
+	dlaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb);
+	goto L50;
+    }
+
+    brow = *m;
+    if (tpsd) {
+	brow = *n;
+    }
+    bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
+		ldb, info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
+		ldb, info);
+	ibscl = 2;
+    }
+
+    if (*m >= *n) {
+
+/*        compute QR factorization of A */
+
+	i__1 = *lwork - mn;
+	dgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
+		;
+
+/*        workspace at least N, optimally N*NB */
+
+	if (! tpsd) {
+
+/*           Least-Squares Problem f2cmin || A * X - B || */
+
+/*           B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */
+
+	    i__1 = *lwork - mn;
+	    dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[
+		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+/*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */
+
+	    dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset]
+		    , lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+	    scllen = *n;
+
+	} else {
+
+/*           Underdetermined system of equations A**T * X = B */
+
+/*           B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */
+
+	    dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], 
+		    lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+/*           B(N+1:M,1:NRHS) = ZERO */
+
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = *n + 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = 0.;
+/* L10: */
+		}
+/* L20: */
+	    }
+
+/*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */
+
+	    i__1 = *lwork - mn;
+	    dormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, &
+		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+	    scllen = *m;
+
+	}
+
+    } else {
+
+/*        Compute LQ factorization of A */
+
+	i__1 = *lwork - mn;
+	dgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info)
+		;
+
+/*        workspace at least M, optimally M*NB. */
+
+	if (! tpsd) {
+
+/*           underdetermined system of equations A * X = B */
+
+/*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */
+
+	    dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset]
+		    , lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+/*           B(M+1:N,1:NRHS) = 0 */
+
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = *m + 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = 0.;
+/* L30: */
+		}
+/* L40: */
+	    }
+
+/*           B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */
+
+	    i__1 = *lwork - mn;
+	    dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[
+		    1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+	    scllen = *n;
+
+	} else {
+
+/*           overdetermined system f2cmin || A**T * X - B || */
+
+/*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */
+
+	    i__1 = *lwork - mn;
+	    dormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, &
+		    work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+/*           B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */
+
+	    dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], 
+		    lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+	    scllen = *m;
+
+	}
+
+    }
+
+/*     Undo scaling */
+
+    if (iascl == 1) {
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    } else if (iascl == 2) {
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    }
+    if (ibscl == 1) {
+	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    } else if (ibscl == 2) {
+	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    }
+
+L50:
+    work[1] = (doublereal) wsize;
+
+    return 0;
+
+/*     End of DGELS */
+
+} /* dgels_ */
+
diff --git a/lapack-netlib/SRC/dgelsd.c b/lapack-netlib/SRC/dgelsd.c
new file mode 100644
index 000000000..ee230d054
--- /dev/null
+++ b/lapack-netlib/SRC/dgelsd.c
@@ -0,0 +1,1153 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__6 = 6;
+static integer c_n1 = -1;
+static integer c__9 = 9;
+static integer c__0 = 0;
+static integer c__1 = 1;
+static doublereal c_b82 = 0.;
+
+/* > \brief <b> DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices</b
+> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGELSD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelsd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelsd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelsd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, */
+/*                          WORK, LWORK, IWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */
+/*       DOUBLE PRECISION   RCOND */
+/*       INTEGER            IWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELSD computes the minimum-norm solution to a real linear least */
+/* > squares problem: */
+/* >     minimize 2-norm(| b - A*x |) */
+/* > using the singular value decomposition (SVD) of A. A is an M-by-N */
+/* > matrix which may be rank-deficient. */
+/* > */
+/* > Several right hand side vectors b and solution vectors x can be */
+/* > handled in a single call; they are stored as the columns of the */
+/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
+/* > matrix X. */
+/* > */
+/* > The problem is solved in three steps: */
+/* > (1) Reduce the coefficient matrix A to bidiagonal form with */
+/* >     Householder transformations, reducing the original problem */
+/* >     into a "bidiagonal least squares problem" (BLS) */
+/* > (2) Solve the BLS using a divide and conquer approach. */
+/* > (3) Apply back all the Householder transformations to solve */
+/* >     the original least squares problem. */
+/* > */
+/* > The effective rank of A is determined by treating as zero those */
+/* > singular values which are less than RCOND times the largest singular */
+/* > value. */
+/* > */
+/* > The divide and conquer algorithm makes very mild assumptions about */
+/* > floating point arithmetic. It will work on machines with a guard */
+/* > digit in add/subtract, or on those binary machines without guard */
+/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
+/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
+/* > without guard digits, but we know of none. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of A. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrices B and X. NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, A has been destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the M-by-NRHS right hand side matrix B. */
+/* >          On exit, B is overwritten by the N-by-NRHS solution */
+/* >          matrix X.  If m >= n and RANK = n, the residual */
+/* >          sum-of-squares for the solution in the i-th column is given */
+/* >          by the sum of squares of elements n+1:m in that column. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,f2cmax(M,N)). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The singular values of A in decreasing order. */
+/* >          The condition number of A in the 2-norm = S(1)/S(f2cmin(m,n)). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >          RCOND is used to determine the effective rank of A. */
+/* >          Singular values S(i) <= RCOND*S(1) are treated as zero. */
+/* >          If RCOND < 0, machine precision is used instead. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RANK */
+/* > \verbatim */
+/* >          RANK is INTEGER */
+/* >          The effective rank of A, i.e., the number of singular values */
+/* >          which are greater than RCOND*S(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK must be at least 1. */
+/* >          The exact minimum amount of workspace needed depends on M, */
+/* >          N and NRHS. As long as LWORK is at least */
+/* >              12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, */
+/* >          if M is greater than or equal to N or */
+/* >              12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, */
+/* >          if M is less than N, the code will execute correctly. */
+/* >          SMLSIZ is returned by ILAENV and is equal to the maximum */
+/* >          size of the subproblems at the bottom of the computation */
+/* >          tree (usually about 25), and */
+/* >             NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) */
+/* >          For good performance, LWORK should generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */
+/* >          LIWORK >= f2cmax(1, 3 * MINMN * NLVL + 11 * MINMN), */
+/* >          where MINMN = MIN( M,N ). */
+/* >          On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  the algorithm for computing the SVD failed to converge; */
+/* >                if INFO = i, i off-diagonal elements of an intermediate */
+/* >                bidiagonal form did not converge to zero. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup doubleGEsolve */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >     Ming Gu and Ren-Cang Li, Computer Science Division, University of */
+/* >       California at Berkeley, USA \n */
+/* >     Osni Marques, LBNL/NERSC, USA \n */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs, 
+	doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
+	s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
+	 integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    doublereal anrm, bnrm;
+    integer itau, nlvl, iascl, ibscl;
+    doublereal sfmin;
+    integer minmn, maxmn, itaup, itauq, mnthr, nwork;
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
+    integer ie, il;
+    extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *);
+    integer mm;
+    extern doublereal dlange_(char *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *);
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *), 
+	    dlalsd_(char *, integer *, integer *, integer *, doublereal *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *), dlascl_(char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *, doublereal *, integer *, integer *), dgeqrf_(
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, integer *), dlacpy_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, 
+	    doublereal *, doublereal *, integer *), xerbla_(char *, 
+	    integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    doublereal bignum;
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *);
+    integer wlalsd;
+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *);
+    integer ldwork;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *);
+    integer liwork, minwrk, maxwrk;
+    doublereal smlnum;
+    logical lquery;
+    integer smlsiz;
+    doublereal eps;
+
+
+/*  -- LAPACK driver routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = f2cmin(*m,*n);
+    maxmn = f2cmax(*m,*n);
+    mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,maxmn)) {
+	*info = -7;
+    }
+
+    smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*     Compute workspace. */
+/*     (Note: Comments in the code beginning "Workspace:" describe the */
+/*     minimal amount of workspace needed at that point in the code, */
+/*     as well as the preferred amount for good performance. */
+/*     NB refers to the optimal block size for the immediately */
+/*     following subroutine, as returned by ILAENV.) */
+
+    minwrk = 1;
+    liwork = 1;
+    minmn = f2cmax(1,minmn);
+/* Computing MAX */
+    i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) / 
+	    log(2.)) + 1;
+    nlvl = f2cmax(i__1,0);
+
+    if (*info == 0) {
+	maxwrk = 0;
+	liwork = minmn * 3 * nlvl + minmn * 11;
+	mm = *m;
+	if (*m >= *n && *m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, 
+		    n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT", 
+		    m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
+	    maxwrk = f2cmax(i__1,i__2);
+	}
+	if (*m >= *n) {
+
+/*           Path 1 - overdetermined or exactly determined. */
+
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD"
+		    , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR", 
+		    "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR",
+		     "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = f2cmax(i__1,i__2);
+/* Computing 2nd power */
+	    i__1 = smlsiz + 1;
+	    wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *
+		    nrhs + i__1 * i__1;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
+	    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = f2cmax(i__1,i__2), 
+		    i__2 = *n * 3 + wlalsd;
+	    minwrk = f2cmax(i__1,i__2);
+	}
+	if (*n > *m) {
+/* Computing 2nd power */
+	    i__1 = smlsiz + 1;
+	    wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *
+		    nrhs + i__1 * i__1;
+	    if (*n >= mnthr) {
+
+/*              Path 2a - underdetermined, with many more columns */
+/*              than rows. */
+
+		maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, 
+			&c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * 
+			ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
+			c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, (
+			ftnlen)3);
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * 
+			ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)3);
+		maxwrk = f2cmax(i__1,i__2);
+		if (*nrhs > 1) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+		    maxwrk = f2cmax(i__1,i__2);
+		} else {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
+		    maxwrk = f2cmax(i__1,i__2);
+		}
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ", 
+			"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
+		maxwrk = f2cmax(i__1,i__2);
+/*     XXX: Ensure the Path 2a case below is triggered.  The workspace */
+/*     calculation should use queries for all routines eventually. */
+/* Computing MAX */
+/* Computing MAX */
+		i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), i__3 =
+			 f2cmax(i__3,*nrhs), i__4 = *n - *m * 3;
+		i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + f2cmax(i__3,i__4);
+		maxwrk = f2cmax(i__1,i__2);
+	    } else {
+
+/*              Path 2 - remaining underdetermined cases. */
+
+		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m,
+			 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR"
+			, "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR", 
+			"PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+/* Computing MAX */
+	    i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = f2cmax(i__1,i__2), 
+		    i__2 = *m * 3 + wlalsd;
+	    minwrk = f2cmax(i__1,i__2);
+	}
+	minwrk = f2cmin(minwrk,maxwrk);
+	work[1] = (doublereal) maxwrk;
+	iwork[1] = liwork;
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELSD", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	goto L10;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters. */
+
+    eps = dlamch_("P");
+    sfmin = dlamch_("S");
+    smlnum = sfmin / eps;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Scale A if f2cmax entry outside range [SMLNUM,BIGNUM]. */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 2;
+    } else if (anrm == 0.) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = f2cmax(*m,*n);
+	dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb);
+	dlaset_("F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1);
+	*rank = 0;
+	goto L10;
+    }
+
+/*     Scale B if f2cmax entry outside range [SMLNUM,BIGNUM]. */
+
+    bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     If M < N make sure certain entries of B are zero. */
+
+    if (*m < *n) {
+	i__1 = *n - *m;
+	dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb);
+    }
+
+/*     Overdetermined case. */
+
+    if (*m >= *n) {
+
+/*        Path 1 - overdetermined or exactly determined. */
+
+	mm = *m;
+	if (*m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+	    itau = 1;
+	    nwork = itau + *n;
+
+/*           Compute A=Q*R. */
+/*           (Workspace: need 2*N, prefer N+N*NB) */
+
+	    i__1 = *lwork - nwork + 1;
+	    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+
+/*           Multiply B by transpose(Q). */
+/*           (Workspace: need N+NRHS, prefer N+NRHS*NB) */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+/*           Zero out below R. */
+
+	    if (*n > 1) {
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2], 
+			lda);
+	    }
+	}
+
+	ie = 1;
+	itauq = ie + *n;
+	itaup = itauq + *n;
+	nwork = itaup + *n;
+
+/*        Bidiagonalize R in A. */
+/*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */
+
+	i__1 = *lwork - nwork + 1;
+	dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		work[itaup], &work[nwork], &i__1, info);
+
+/*        Multiply B by transpose of left bidiagonalizing vectors of R. */
+/*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */
+
+	i__1 = *lwork - nwork + 1;
+	dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], 
+		&b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, 
+		rcond, rank, &work[nwork], &iwork[1], info);
+	if (*info != 0) {
+	    goto L10;
+	}
+
+/*        Multiply B by right bidiagonalizing vectors of R. */
+
+	i__1 = *lwork - nwork + 1;
+	dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
+		b[b_offset], ldb, &work[nwork], &i__1, info);
+
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = *m, i__2 = (*m << 1) - 4, i__1 = f2cmax(i__1,i__2), i__1 = f2cmax(
+		i__1,*nrhs), i__2 = *n - *m * 3, i__1 = f2cmax(i__1,i__2);
+	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + f2cmax(i__1,wlalsd)) {
+
+/*        Path 2a - underdetermined, with many more columns than rows */
+/*        and sufficient workspace for an efficient algorithm. */
+
+	    ldwork = *m;
+/* Computing MAX */
+/* Computing MAX */
+	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), i__3 = 
+		    f2cmax(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__1 = (*m << 2) + *m * *lda + f2cmax(i__3,i__4), i__2 = *m * *lda + 
+		    *m + *m * *nrhs, i__1 = f2cmax(i__1,i__2), i__2 = (*m << 2) 
+		    + *m * *lda + wlalsd;
+	    if (*lwork >= f2cmax(i__1,i__2)) {
+		ldwork = *lda;
+	    }
+	    itau = 1;
+	    nwork = *m + 1;
+
+/*        Compute A=L*Q. */
+/*        (Workspace: need 2*M, prefer M+M*NB) */
+
+	    i__1 = *lwork - nwork + 1;
+	    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+	    il = nwork;
+
+/*        Copy L to WORK(IL), zeroing out above its diagonal. */
+
+	    dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
+	    i__1 = *m - 1;
+	    i__2 = *m - 1;
+	    dlaset_("U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], &
+		    ldwork);
+	    ie = il + ldwork * *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*        Bidiagonalize L in WORK(IL). */
+/*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */
+
+	    i__1 = *lwork - nwork + 1;
+	    dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], 
+		    &work[itaup], &work[nwork], &i__1, info);
+
+/*        Multiply B by transpose of left bidiagonalizing vectors of L. */
+/*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
+		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], 
+		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of L. */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
+		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Zero out below first M rows of B. */
+
+	    i__1 = *n - *m;
+	    dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], 
+		    ldb);
+	    nwork = itau + *m;
+
+/*        Multiply transpose(Q) by B. */
+/*        (Workspace: need M+NRHS, prefer M+NRHS*NB) */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+	} else {
+
+/*        Path 2 - remaining underdetermined cases. */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*        Bidiagonalize A. */
+/*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
+
+	    i__1 = *lwork - nwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__1, info);
+
+/*        Multiply B by transpose of left bidiagonalizing vectors. */
+/*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], 
+		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of A. */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+	}
+    }
+
+/*     Undo scaling. */
+
+    if (iascl == 1) {
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    } else if (iascl == 2) {
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    }
+    if (ibscl == 1) {
+	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L10:
+    work[1] = (doublereal) maxwrk;
+    iwork[1] = liwork;
+    return 0;
+
+/*     End of DGELSD */
+
+} /* dgelsd_ */
+
diff --git a/lapack-netlib/SRC/dgelss.c b/lapack-netlib/SRC/dgelss.c
new file mode 100644
index 000000000..7d25a55ec
--- /dev/null
+++ b/lapack-netlib/SRC/dgelss.c
@@ -0,0 +1,1318 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__6 = 6;
+static integer c_n1 = -1;
+static integer c__0 = 0;
+static doublereal c_b46 = 0.;
+static integer c__1 = 1;
+static doublereal c_b79 = 1.;
+
+/* > \brief <b> DGELSS solves overdetermined or underdetermined systems for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGELSS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelss.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelss.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelss.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, */
+/*                          WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */
+/*       DOUBLE PRECISION   RCOND */
+/*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELSS computes the minimum norm solution to a real linear least */
+/* > squares problem: */
+/* > */
+/* > Minimize 2-norm(| b - A*x |). */
+/* > */
+/* > using the singular value decomposition (SVD) of A. A is an M-by-N */
+/* > matrix which may be rank-deficient. */
+/* > */
+/* > Several right hand side vectors b and solution vectors x can be */
+/* > handled in a single call; they are stored as the columns of the */
+/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix */
+/* > X. */
+/* > */
+/* > The effective rank of A is determined by treating as zero those */
+/* > singular values which are less than RCOND times the largest singular */
+/* > value. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrices B and X. NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the first f2cmin(m,n) rows of A are overwritten with */
+/* >          its right singular vectors, stored rowwise. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the M-by-NRHS right hand side matrix B. */
+/* >          On exit, B is overwritten by the N-by-NRHS solution */
+/* >          matrix X.  If m >= n and RANK = n, the residual */
+/* >          sum-of-squares for the solution in the i-th column is given */
+/* >          by the sum of squares of elements n+1:m in that column. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,f2cmax(M,N)). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The singular values of A in decreasing order. */
+/* >          The condition number of A in the 2-norm = S(1)/S(f2cmin(m,n)). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >          RCOND is used to determine the effective rank of A. */
+/* >          Singular values S(i) <= RCOND*S(1) are treated as zero. */
+/* >          If RCOND < 0, machine precision is used instead. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RANK */
+/* > \verbatim */
+/* >          RANK is INTEGER */
+/* >          The effective rank of A, i.e., the number of singular values */
+/* >          which are greater than RCOND*S(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= 1, and also: */
+/* >          LWORK >= 3*f2cmin(M,N) + f2cmax( 2*f2cmin(M,N), f2cmax(M,N), NRHS ) */
+/* >          For good performance, LWORK should generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  the algorithm for computing the SVD failed to converge; */
+/* >                if INFO = i, i off-diagonal elements of an intermediate */
+/* >                bidiagonal form did not converge to zero. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgelss_(integer *m, integer *n, integer *nrhs, 
+	doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
+	s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1;
+
+    /* Local variables */
+    doublereal anrm, bnrm;
+    integer itau, lwork_dgebrd__, lwork_dgelqf__, lwork_dgeqrf__, 
+	    lwork_dorgbr__, lwork_dormbr__, i__, lwork_dormlq__, 
+	    lwork_dormqr__;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer iascl, ibscl;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *), drscl_(integer *, 
+	    doublereal *, doublereal *, integer *);
+    integer chunk;
+    doublereal sfmin;
+    integer minmn;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer maxmn, itaup, itauq, mnthr, iwork;
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
+    integer bl, ie, il;
+    extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *);
+    integer mm;
+    extern doublereal dlange_(char *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *);
+    integer bdspac;
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *), 
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *, 
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dgeqrf_(integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
+	     integer *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *), dlaset_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen), dbdsqr_(char *, integer *, 
+	    integer *, integer *, integer *, doublereal *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *), dorgbr_(char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *);
+    doublereal bignum;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *), dormlq_(char *, char *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *);
+    integer ldwork;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *);
+    integer minwrk, maxwrk;
+    doublereal smlnum;
+    logical lquery;
+    doublereal dum[1], eps, thr;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --s;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    minmn = f2cmin(*m,*n);
+    maxmn = f2cmax(*m,*n);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,maxmn)) {
+	*info = -7;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV.) */
+
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	if (minmn > 0) {
+	    mm = *m;
+	    mnthr = ilaenv_(&c__6, "DGELSS", " ", m, n, nrhs, &c_n1, (ftnlen)
+		    6, (ftnlen)1);
+	    if (*m >= *n && *m >= mnthr) {
+
+/*              Path 1a - overdetermined, with many more rows than */
+/*                        columns */
+
+/*              Compute space needed for DGEQRF */
+		dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
+		lwork_dgeqrf__ = (integer) dum[0];
+/*              Compute space needed for DORMQR */
+		dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, dum, &b[
+			b_offset], ldb, dum, &c_n1, info);
+		lwork_dormqr__ = (integer) dum[0];
+		mm = *n;
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_dgeqrf__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + lwork_dormqr__;
+		maxwrk = f2cmax(i__1,i__2);
+	    }
+	    if (*m >= *n) {
+
+/*              Path 1 - overdetermined or exactly determined */
+
+/*              Compute workspace needed for DBDSQR */
+
+/* Computing MAX */
+		i__1 = 1, i__2 = *n * 5;
+		bdspac = f2cmax(i__1,i__2);
+/*              Compute space needed for DGEBRD */
+		dgebrd_(&mm, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, 
+			&c_n1, info);
+		lwork_dgebrd__ = (integer) dum[0];
+/*              Compute space needed for DORMBR */
+		dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, dum, &
+			b[b_offset], ldb, dum, &c_n1, info);
+		lwork_dormbr__ = (integer) dum[0];
+/*              Compute space needed for DORGBR */
+		dorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, 
+			info);
+		lwork_dorgbr__ = (integer) dum[0];
+/*              Compute total workspace needed */
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * 3 + lwork_dgebrd__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * 3 + lwork_dormbr__;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * 3 + lwork_dorgbr__;
+		maxwrk = f2cmax(i__1,i__2);
+		maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n * *nrhs;
+		maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = f2cmax(i__1,
+			i__2);
+		minwrk = f2cmax(i__1,bdspac);
+		maxwrk = f2cmax(minwrk,maxwrk);
+	    }
+	    if (*n > *m) {
+
+/*              Compute workspace needed for DBDSQR */
+
+/* Computing MAX */
+		i__1 = 1, i__2 = *m * 5;
+		bdspac = f2cmax(i__1,i__2);
+/* Computing MAX */
+		i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = f2cmax(i__1,
+			i__2);
+		minwrk = f2cmax(i__1,bdspac);
+		if (*n >= mnthr) {
+
+/*                 Path 2a - underdetermined, with many more columns */
+/*                 than rows */
+
+/*                 Compute space needed for DGELQF */
+		    dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info);
+		    lwork_dgelqf__ = (integer) dum[0];
+/*                 Compute space needed for DGEBRD */
+		    dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, 
+			    dum, &c_n1, info);
+		    lwork_dgebrd__ = (integer) dum[0];
+/*                 Compute space needed for DORMBR */
+		    dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, dum,
+			     &b[b_offset], ldb, dum, &c_n1, info);
+		    lwork_dormbr__ = (integer) dum[0];
+/*                 Compute space needed for DORGBR */
+		    dorgbr_("P", m, m, m, &a[a_offset], lda, dum, dum, &c_n1, 
+			    info);
+		    lwork_dorgbr__ = (integer) dum[0];
+/*                 Compute space needed for DORMLQ */
+		    dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, dum, &b[
+			    b_offset], ldb, dum, &c_n1, info);
+		    lwork_dormlq__ = (integer) dum[0];
+/*                 Compute total workspace needed */
+		    maxwrk = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + 
+			    lwork_dgebrd__;
+		    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + 
+			    lwork_dormbr__;
+		    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + 
+			    lwork_dorgbr__;
+		    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + *m + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    if (*nrhs > 1) {
+/* Computing MAX */
+			i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+			maxwrk = f2cmax(i__1,i__2);
+		    } else {
+/* Computing MAX */
+			i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
+			maxwrk = f2cmax(i__1,i__2);
+		    }
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m + lwork_dormlq__;
+		    maxwrk = f2cmax(i__1,i__2);
+		} else {
+
+/*                 Path 2 - underdetermined */
+
+/*                 Compute space needed for DGEBRD */
+		    dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, 
+			    dum, &c_n1, info);
+		    lwork_dgebrd__ = (integer) dum[0];
+/*                 Compute space needed for DORMBR */
+		    dormbr_("Q", "L", "T", m, nrhs, m, &a[a_offset], lda, dum,
+			     &b[b_offset], ldb, dum, &c_n1, info);
+		    lwork_dormbr__ = (integer) dum[0];
+/*                 Compute space needed for DORGBR */
+		    dorgbr_("P", m, n, m, &a[a_offset], lda, dum, dum, &c_n1, 
+			    info);
+		    lwork_dorgbr__ = (integer) dum[0];
+		    maxwrk = *m * 3 + lwork_dgebrd__;
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * 3 + lwork_dormbr__;
+		    maxwrk = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * 3 + lwork_dorgbr__;
+		    maxwrk = f2cmax(i__1,i__2);
+		    maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *n * *nrhs;
+		    maxwrk = f2cmax(i__1,i__2);
+		}
+	    }
+	    maxwrk = f2cmax(minwrk,maxwrk);
+	}
+	work[1] = (doublereal) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELSS", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    eps = dlamch_("P");
+    sfmin = dlamch_("S");
+    smlnum = sfmin / eps;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 2;
+    } else if (anrm == 0.) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = f2cmax(*m,*n);
+	dlaset_("F", &i__1, nrhs, &c_b46, &c_b46, &b[b_offset], ldb);
+	dlaset_("F", &minmn, &c__1, &c_b46, &c_b46, &s[1], &minmn);
+	*rank = 0;
+	goto L70;
+    }
+
+/*     Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     Overdetermined case */
+
+    if (*m >= *n) {
+
+/*        Path 1 - overdetermined or exactly determined */
+
+	mm = *m;
+	if (*m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns */
+
+	    mm = *n;
+	    itau = 1;
+	    iwork = itau + *n;
+
+/*           Compute A=Q*R */
+/*           (Workspace: need 2*N, prefer N+N*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1,
+		     info);
+
+/*           Multiply B by transpose(Q) */
+/*           (Workspace: need N+NRHS, prefer N+NRHS*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[iwork], &i__1, info);
+
+/*           Zero out below R */
+
+	    if (*n > 1) {
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b46, &c_b46, &a[a_dim1 + 2], 
+			lda);
+	    }
+	}
+
+	ie = 1;
+	itauq = ie + *n;
+	itaup = itauq + *n;
+	iwork = itaup + *n;
+
+/*        Bidiagonalize R in A */
+/*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */
+
+	i__1 = *lwork - iwork + 1;
+	dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		work[itaup], &work[iwork], &i__1, info);
+
+/*        Multiply B by transpose of left bidiagonalizing vectors of R */
+/*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */
+
+	i__1 = *lwork - iwork + 1;
+	dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], 
+		&b[b_offset], ldb, &work[iwork], &i__1, info);
+
+/*        Generate right bidiagonalizing vectors of R in A */
+/*        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
+
+	i__1 = *lwork - iwork + 1;
+	dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &
+		i__1, info);
+	iwork = ie + *n;
+
+/*        Perform bidiagonal QR iteration */
+/*          multiply B by transpose of left singular vectors */
+/*          compute right singular vectors in A */
+/*        (Workspace: need BDSPAC) */
+
+	dbdsqr_("U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, 
+		dum, &c__1, &b[b_offset], ldb, &work[iwork], info);
+	if (*info != 0) {
+	    goto L70;
+	}
+
+/*        Multiply B by reciprocals of singular values */
+
+/* Computing MAX */
+	d__1 = *rcond * s[1];
+	thr = f2cmax(d__1,sfmin);
+	if (*rcond < 0.) {
+/* Computing MAX */
+	    d__1 = eps * s[1];
+	    thr = f2cmax(d__1,sfmin);
+	}
+	*rank = 0;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (s[i__] > thr) {
+		drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
+		++(*rank);
+	    } else {
+		dlaset_("F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], 
+			ldb);
+	    }
+/* L10: */
+	}
+
+/*        Multiply B by right singular vectors */
+/*        (Workspace: need N, prefer N*NRHS) */
+
+	if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
+	    dgemm_("T", "N", n, nrhs, n, &c_b79, &a[a_offset], lda, &b[
+		    b_offset], ldb, &c_b46, &work[1], ldb);
+	    dlacpy_("G", n, nrhs, &work[1], ldb, &b[b_offset], ldb)
+		    ;
+	} else if (*nrhs > 1) {
+	    chunk = *lwork / *n;
+	    i__1 = *nrhs;
+	    i__2 = chunk;
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = *nrhs - i__ + 1;
+		bl = f2cmin(i__3,chunk);
+		dgemm_("T", "N", n, &bl, n, &c_b79, &a[a_offset], lda, &b[i__ 
+			* b_dim1 + 1], ldb, &c_b46, &work[1], n);
+		dlacpy_("G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb);
+/* L20: */
+	    }
+	} else {
+	    dgemv_("T", n, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, 
+		    &c_b46, &work[1], &c__1);
+	    dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
+	}
+
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__2 = *m, i__1 = (*m << 1) - 4, i__2 = f2cmax(i__2,i__1), i__2 = f2cmax(
+		i__2,*nrhs), i__1 = *n - *m * 3;
+	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + f2cmax(i__2,i__1)) {
+
+/*        Path 2a - underdetermined, with many more columns than rows */
+/*        and sufficient workspace for an efficient algorithm */
+
+	    ldwork = *m;
+/* Computing MAX */
+/* Computing MAX */
+	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), i__3 = 
+		    f2cmax(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__2 = (*m << 2) + *m * *lda + f2cmax(i__3,i__4), i__1 = *m * *lda + 
+		    *m + *m * *nrhs;
+	    if (*lwork >= f2cmax(i__2,i__1)) {
+		ldwork = *lda;
+	    }
+	    itau = 1;
+	    iwork = *m + 1;
+
+/*        Compute A=L*Q */
+/*        (Workspace: need 2*M, prefer M+M*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2,
+		     info);
+	    il = iwork;
+
+/*        Copy L to WORK(IL), zeroing out above it */
+
+	    dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
+	    i__2 = *m - 1;
+	    i__1 = *m - 1;
+	    dlaset_("U", &i__2, &i__1, &c_b46, &c_b46, &work[il + ldwork], &
+		    ldwork);
+	    ie = il + ldwork * *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    iwork = itaup + *m;
+
+/*        Bidiagonalize L in WORK(IL) */
+/*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], 
+		    &work[itaup], &work[iwork], &i__2, info);
+
+/*        Multiply B by transpose of left bidiagonalizing vectors of L */
+/*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
+		    itauq], &b[b_offset], ldb, &work[iwork], &i__2, info);
+
+/*        Generate right bidiagonalizing vectors of R in WORK(IL) */
+/*        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    dorgbr_("P", m, m, m, &work[il], &ldwork, &work[itaup], &work[
+		    iwork], &i__2, info);
+	    iwork = ie + *m;
+
+/*        Perform bidiagonal QR iteration, */
+/*           computing right singular vectors of L in WORK(IL) and */
+/*           multiplying B by transpose of left singular vectors */
+/*        (Workspace: need M*M+M+BDSPAC) */
+
+	    dbdsqr_("U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], &
+		    ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork]
+		    , info);
+	    if (*info != 0) {
+		goto L70;
+	    }
+
+/*        Multiply B by reciprocals of singular values */
+
+/* Computing MAX */
+	    d__1 = *rcond * s[1];
+	    thr = f2cmax(d__1,sfmin);
+	    if (*rcond < 0.) {
+/* Computing MAX */
+		d__1 = eps * s[1];
+		thr = f2cmax(d__1,sfmin);
+	    }
+	    *rank = 0;
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		if (s[i__] > thr) {
+		    drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
+		    ++(*rank);
+		} else {
+		    dlaset_("F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1]
+			    , ldb);
+		}
+/* L30: */
+	    }
+	    iwork = ie;
+
+/*        Multiply B by right singular vectors of L in WORK(IL) */
+/*        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */
+
+	    if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) {
+		dgemm_("T", "N", m, nrhs, m, &c_b79, &work[il], &ldwork, &b[
+			b_offset], ldb, &c_b46, &work[iwork], ldb);
+		dlacpy_("G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb);
+	    } else if (*nrhs > 1) {
+		chunk = (*lwork - iwork + 1) / *m;
+		i__2 = *nrhs;
+		i__1 = chunk;
+		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += 
+			i__1) {
+/* Computing MIN */
+		    i__3 = *nrhs - i__ + 1;
+		    bl = f2cmin(i__3,chunk);
+		    dgemm_("T", "N", m, &bl, m, &c_b79, &work[il], &ldwork, &
+			    b[i__ * b_dim1 + 1], ldb, &c_b46, &work[iwork], m);
+		    dlacpy_("G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1]
+			    , ldb);
+/* L40: */
+		}
+	    } else {
+		dgemv_("T", m, m, &c_b79, &work[il], &ldwork, &b[b_dim1 + 1], 
+			&c__1, &c_b46, &work[iwork], &c__1);
+		dcopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1);
+	    }
+
+/*        Zero out below first M rows of B */
+
+	    i__1 = *n - *m;
+	    dlaset_("F", &i__1, nrhs, &c_b46, &c_b46, &b[*m + 1 + b_dim1], 
+		    ldb);
+	    iwork = itau + *m;
+
+/*        Multiply transpose(Q) by B */
+/*        (Workspace: need M+NRHS, prefer M+NRHS*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[iwork], &i__1, info);
+
+	} else {
+
+/*        Path 2 - remaining underdetermined cases */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    iwork = itaup + *m;
+
+/*        Bidiagonalize A */
+/*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[iwork], &i__1, info);
+
+/*        Multiply B by transpose of left bidiagonalizing vectors */
+/*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
+		    , &b[b_offset], ldb, &work[iwork], &i__1, info);
+
+/*        Generate right bidiagonalizing vectors in A */
+/*        (Workspace: need 4*M, prefer 3*M+M*NB) */
+
+	    i__1 = *lwork - iwork + 1;
+	    dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
+		    iwork], &i__1, info);
+	    iwork = ie + *m;
+
+/*        Perform bidiagonal QR iteration, */
+/*           computing right singular vectors of A in A and */
+/*           multiplying B by transpose of left singular vectors */
+/*        (Workspace: need BDSPAC) */
+
+	    dbdsqr_("L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], 
+		    lda, dum, &c__1, &b[b_offset], ldb, &work[iwork], info);
+	    if (*info != 0) {
+		goto L70;
+	    }
+
+/*        Multiply B by reciprocals of singular values */
+
+/* Computing MAX */
+	    d__1 = *rcond * s[1];
+	    thr = f2cmax(d__1,sfmin);
+	    if (*rcond < 0.) {
+/* Computing MAX */
+		d__1 = eps * s[1];
+		thr = f2cmax(d__1,sfmin);
+	    }
+	    *rank = 0;
+	    i__1 = *m;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (s[i__] > thr) {
+		    drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb);
+		    ++(*rank);
+		} else {
+		    dlaset_("F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1]
+			    , ldb);
+		}
+/* L50: */
+	    }
+
+/*        Multiply B by right singular vectors of A */
+/*        (Workspace: need N, prefer N*NRHS) */
+
+	    if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
+		dgemm_("T", "N", n, nrhs, m, &c_b79, &a[a_offset], lda, &b[
+			b_offset], ldb, &c_b46, &work[1], ldb);
+		dlacpy_("F", n, nrhs, &work[1], ldb, &b[b_offset], ldb);
+	    } else if (*nrhs > 1) {
+		chunk = *lwork / *n;
+		i__1 = *nrhs;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
+			i__2) {
+/* Computing MIN */
+		    i__3 = *nrhs - i__ + 1;
+		    bl = f2cmin(i__3,chunk);
+		    dgemm_("T", "N", n, &bl, m, &c_b79, &a[a_offset], lda, &b[
+			    i__ * b_dim1 + 1], ldb, &c_b46, &work[1], n);
+		    dlacpy_("F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], 
+			    ldb);
+/* L60: */
+		}
+	    } else {
+		dgemv_("T", m, n, &c_b79, &a[a_offset], lda, &b[b_offset], &
+			c__1, &c_b46, &work[1], &c__1);
+		dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
+	    }
+	}
+    }
+
+/*     Undo scaling */
+
+    if (iascl == 1) {
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    } else if (iascl == 2) {
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    }
+    if (ibscl == 1) {
+	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L70:
+    work[1] = (doublereal) maxwrk;
+    return 0;
+
+/*     End of DGELSS */
+
+} /* dgelss_ */
+
diff --git a/lapack-netlib/SRC/dgelsy.c b/lapack-netlib/SRC/dgelsy.c
new file mode 100644
index 000000000..0a48ab1d5
--- /dev/null
+++ b/lapack-netlib/SRC/dgelsy.c
@@ -0,0 +1,945 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__0 = 0;
+static doublereal c_b31 = 0.;
+static integer c__2 = 2;
+static doublereal c_b54 = 1.;
+
+/* > \brief <b> DGELSY solves overdetermined or underdetermined systems for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGELSY + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgelsy.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgelsy.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgelsy.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, */
+/*                          WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */
+/*       DOUBLE PRECISION   RCOND */
+/*       INTEGER            JPVT( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGELSY computes the minimum-norm solution to a real linear least */
+/* > squares problem: */
+/* >     minimize || A * X - B || */
+/* > using a complete orthogonal factorization of A.  A is an M-by-N */
+/* > matrix which may be rank-deficient. */
+/* > */
+/* > Several right hand side vectors b and solution vectors x can be */
+/* > handled in a single call; they are stored as the columns of the */
+/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
+/* > matrix X. */
+/* > */
+/* > The routine first computes a QR factorization with column pivoting: */
+/* >     A * P = Q * [ R11 R12 ] */
+/* >                 [  0  R22 ] */
+/* > with R11 defined as the largest leading submatrix whose estimated */
+/* > condition number is less than 1/RCOND.  The order of R11, RANK, */
+/* > is the effective rank of A. */
+/* > */
+/* > Then, R22 is considered to be negligible, and R12 is annihilated */
+/* > by orthogonal transformations from the right, arriving at the */
+/* > complete orthogonal factorization: */
+/* >    A * P = Q * [ T11 0 ] * Z */
+/* >                [  0  0 ] */
+/* > The minimum-norm solution is then */
+/* >    X = P * Z**T [ inv(T11)*Q1**T*B ] */
+/* >                 [        0         ] */
+/* > where Q1 consists of the first RANK columns of Q. */
+/* > */
+/* > This routine is basically identical to the original xGELSX except */
+/* > three differences: */
+/* >   o The call to the subroutine xGEQPF has been substituted by the */
+/* >     the call to the subroutine xGEQP3. This subroutine is a Blas-3 */
+/* >     version of the QR factorization with column pivoting. */
+/* >   o Matrix B (the right hand side) is updated with Blas-3. */
+/* >   o The permutation of matrix B (the right hand side) is faster and */
+/* >     more simple. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of */
+/* >          columns of matrices B and X. NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, A has been overwritten by details of its */
+/* >          complete orthogonal factorization. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the M-by-NRHS right hand side matrix B. */
+/* >          On exit, the N-by-NRHS solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= f2cmax(1,M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] JPVT */
+/* > \verbatim */
+/* >          JPVT is INTEGER array, dimension (N) */
+/* >          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */
+/* >          to the front of AP, otherwise column i is a free column. */
+/* >          On exit, if JPVT(i) = k, then the i-th column of AP */
+/* >          was the k-th column of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >          RCOND is used to determine the effective rank of A, which */
+/* >          is defined as the order of the largest leading triangular */
+/* >          submatrix R11 in the QR factorization with pivoting of A, */
+/* >          whose estimated condition number < 1/RCOND. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RANK */
+/* > \verbatim */
+/* >          RANK is INTEGER */
+/* >          The effective rank of A, i.e., the order of the submatrix */
+/* >          R11.  This is the same as the order of the submatrix T11 */
+/* >          in the complete orthogonal factorization of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          The unblocked strategy requires that: */
+/* >             LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */
+/* >          where MN = f2cmin( M, N ). */
+/* >          The block algorithm requires that: */
+/* >             LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */
+/* >          where NB is an upper bound on the blocksize returned */
+/* >          by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR, */
+/* >          and DORMRZ. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: If INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEsolve */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n */
+/* >    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */
+/* >    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgelsy_(integer *m, integer *n, integer *nrhs, 
+	doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *
+	jpvt, doublereal *rcond, integer *rank, doublereal *work, integer *
+	lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    doublereal anrm, bnrm, smin, smax;
+    integer i__, j, iascl, ibscl;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer ismin, ismax;
+    doublereal c1, c2;
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *), dlaic1_(
+	    integer *, integer *, doublereal *, doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *, doublereal *);
+    doublereal wsize, s1, s2;
+    extern /* Subroutine */ int dgeqp3_(integer *, integer *, doublereal *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dlabad_(doublereal *, doublereal *);
+    integer nb;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    integer mn;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *), dlaset_(char *, integer *, integer 
+	    *, doublereal *, doublereal *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    doublereal bignum;
+    integer lwkmin, nb1, nb2, nb3, nb4;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *);
+    doublereal sminpr, smaxpr, smlnum;
+    extern /* Subroutine */ int dormrz_(char *, char *, integer *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *);
+    integer lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int dtzrzf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --jpvt;
+    --work;
+
+    /* Function Body */
+    mn = f2cmin(*m,*n);
+    ismin = mn + 1;
+    ismax = (mn << 1) + 1;
+
+/*     Test the input arguments. */
+
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*m);
+	if (*ldb < f2cmax(i__1,*n)) {
+	    *info = -7;
+	}
+    }
+
+/*     Figure out optimal block size */
+
+    if (*info == 0) {
+	if (mn == 0 || *nrhs == 0) {
+	    lwkmin = 1;
+	    lwkopt = 1;
+	} else {
+	    nb1 = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
+		     (ftnlen)1);
+	    nb2 = ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6,
+		     (ftnlen)1);
+	    nb3 = ilaenv_(&c__1, "DORMQR", " ", m, n, nrhs, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    nb4 = ilaenv_(&c__1, "DORMRQ", " ", m, n, nrhs, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+/* Computing MAX */
+	    i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3);
+	    nb = f2cmax(i__1,nb4);
+/* Computing MAX */
+	    i__1 = mn << 1, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = mn + 
+		    *nrhs;
+	    lwkmin = mn + f2cmax(i__1,i__2);
+/* Computing MAX */
+	    i__1 = lwkmin, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = f2cmax(
+		    i__1,i__2), i__2 = (mn << 1) + nb * *nrhs;
+	    lwkopt = f2cmax(i__1,i__2);
+	}
+	work[1] = (doublereal) lwkopt;
+
+	if (*lwork < lwkmin && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELSY", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (mn == 0 || *nrhs == 0) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = dlamch_("S") / dlamch_("P");
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Scale A, B if f2cmax entries outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 2;
+    } else if (anrm == 0.) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = f2cmax(*m,*n);
+	dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb);
+	*rank = 0;
+	goto L70;
+    }
+
+    bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     Compute QR factorization with column pivoting of A: */
+/*        A * P = Q * R */
+
+    i__1 = *lwork - mn;
+    dgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1,
+	     info);
+    wsize = mn + work[mn + 1];
+
+/*     workspace: MN+2*N+NB*(N+1). */
+/*     Details of Householder rotations stored in WORK(1:MN). */
+
+/*     Determine RANK using incremental condition estimation */
+
+    work[ismin] = 1.;
+    work[ismax] = 1.;
+    smax = (d__1 = a[a_dim1 + 1], abs(d__1));
+    smin = smax;
+    if ((d__1 = a[a_dim1 + 1], abs(d__1)) == 0.) {
+	*rank = 0;
+	i__1 = f2cmax(*m,*n);
+	dlaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb);
+	goto L70;
+    } else {
+	*rank = 1;
+    }
+
+L10:
+    if (*rank < mn) {
+	i__ = *rank + 1;
+	dlaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[
+		i__ + i__ * a_dim1], &sminpr, &s1, &c1);
+	dlaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[
+		i__ + i__ * a_dim1], &smaxpr, &s2, &c2);
+
+	if (smaxpr * *rcond <= sminpr) {
+	    i__1 = *rank;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1];
+		work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1];
+/* L20: */
+	    }
+	    work[ismin + *rank] = c1;
+	    work[ismax + *rank] = c2;
+	    smin = sminpr;
+	    smax = smaxpr;
+	    ++(*rank);
+	    goto L10;
+	}
+    }
+
+/*     workspace: 3*MN. */
+
+/*     Logically partition R = [ R11 R12 ] */
+/*                             [  0  R22 ] */
+/*     where R11 = R(1:RANK,1:RANK) */
+
+/*     [R11,R12] = [ T11, 0 ] * Y */
+
+    if (*rank < *n) {
+	i__1 = *lwork - (mn << 1);
+	dtzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + 
+		1], &i__1, info);
+    }
+
+/*     workspace: 2*MN. */
+/*     Details of Householder rotations stored in WORK(MN+1:2*MN) */
+
+/*     B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */
+
+    i__1 = *lwork - (mn << 1);
+    dormqr_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], &
+	    b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info);
+/* Computing MAX */
+    d__1 = wsize, d__2 = (mn << 1) + work[(mn << 1) + 1];
+    wsize = f2cmax(d__1,d__2);
+
+/*     workspace: 2*MN+NB*NRHS. */
+
+/*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */
+
+    dtrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b54, &
+	    a[a_offset], lda, &b[b_offset], ldb);
+
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = *rank + 1; i__ <= i__2; ++i__) {
+	    b[i__ + j * b_dim1] = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+
+/*     B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) */
+
+    if (*rank < *n) {
+	i__1 = *n - *rank;
+	i__2 = *lwork - (mn << 1);
+	dormrz_("Left", "Transpose", n, nrhs, rank, &i__1, &a[a_offset], lda, 
+		&work[mn + 1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__2,
+		 info);
+    }
+
+/*     workspace: 2*MN+NRHS. */
+
+/*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */
+
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[jpvt[i__]] = b[i__ + j * b_dim1];
+/* L50: */
+	}
+	dcopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1);
+/* L60: */
+    }
+
+/*     workspace: N. */
+
+/*     Undo scaling */
+
+    if (iascl == 1) {
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], 
+		lda, info);
+    } else if (iascl == 2) {
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], 
+		lda, info);
+    }
+    if (ibscl == 1) {
+	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L70:
+    work[1] = (doublereal) lwkopt;
+
+    return 0;
+
+/*     End of DGELSY */
+
+} /* dgelsy_ */
+
diff --git a/lapack-netlib/SRC/dgemlq.c b/lapack-netlib/SRC/dgemlq.c
new file mode 100644
index 000000000..3baf41bd0
--- /dev/null
+++ b/lapack-netlib/SRC/dgemlq.c
@@ -0,0 +1,684 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGEMLQ */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE DGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, */
+/*     $                   TSIZE, C, LDC, WORK, LWORK, INFO ) */
+
+
+/*      CHARACTER          SIDE, TRANS */
+/*      INTEGER            INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */
+/*      DOUBLE PRECISION   A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) */
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >     DGEMLQ overwrites the general real M-by-N matrix C with */
+/* > */
+/* >                    SIDE = 'L'     SIDE = 'R' */
+/* >    TRANS = 'N':      Q * C          C * Q */
+/* >    TRANS = 'T':      Q**T * C       C * Q**T */
+/* >    where Q is a real orthogonal matrix defined as the product */
+/* >    of blocked elementary reflectors computed by short wide LQ */
+/* >    factorization (DGELQ) */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'L': apply Q or Q**T from the Left; */
+/* >          = 'R': apply Q or Q**T from the Right. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N':  No transpose, apply Q; */
+/* >          = 'T':  Transpose, apply Q**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The number of elementary reflectors whose product defines */
+/* >          the matrix Q. */
+/* >          If SIDE = 'L', M >= K >= 0; */
+/* >          if SIDE = 'R', N >= K >= 0. */
+/* > */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension */
+/* >                               (LDA,M) if SIDE = 'L', */
+/* >                               (LDA,N) if SIDE = 'R' */
+/* >          Part of the data structure to represent Q as returned by DGELQ. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,K). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)). */
+/* >          Part of the data structure to represent Q as returned by DGELQ. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TSIZE */
+/* > \verbatim */
+/* >          TSIZE is INTEGER */
+/* >          The dimension of the array T. TSIZE >= 5. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (LDC,N) */
+/* >          On entry, the M-by-N matrix C. */
+/* >          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >         (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1, then a workspace query is assumed. The routine */
+/* >          only calculates the size of the WORK array, returns this */
+/* >          value as WORK(1), and no error message related to WORK */
+/* >          is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \par Further Details */
+/*  ==================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > These details are particular for this LAPACK implementation. Users should not */
+/* > take them for granted. These details may change in the future, and are not likely */
+/* > true for another LAPACK implementation. These details are relevant if one wants */
+/* > to try to understand the code. They are not part of the interface. */
+/* > */
+/* > In this version, */
+/* > */
+/* >          T(2): row block size (MB) */
+/* >          T(3): column block size (NB) */
+/* >          T(6:TSIZE): data structure needed for Q, computed by */
+/* >                           DLASWLQ or DGELQT */
+/* > */
+/* >  Depending on the matrix dimensions M and N, and row and column */
+/* >  block sizes MB and NB returned by ILAENV, DGELQ will use either */
+/* >  DLASWLQ (if the matrix is wide-and-short) or DGELQT to compute */
+/* >  the LQ factorization. */
+/* >  This version of DGEMLQ will use either DLAMSWLQ or DGEMLQT to */
+/* >  multiply matrix Q by another matrix. */
+/* >  Further Details in DLAMSWLQ or DGEMLQT. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgemlq_(char *side, char *trans, integer *m, integer *n, 
+	integer *k, doublereal *a, integer *lda, doublereal *t, integer *
+	tsize, doublereal *c__, integer *ldc, doublereal *work, integer *
+	lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1;
+
+    /* Local variables */
+    logical left, tran;
+    extern /* Subroutine */ int dlamswlq_(char *, char *, integer *, integer *
+	    , integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, integer *);
+    extern logical lsame_(char *, char *);
+    logical right;
+    integer mb, nb, mn, lw, nblcks;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical notran, lquery;
+    extern /* Subroutine */ int dgemlqt_(char *, char *, integer *, integer *,
+	     integer *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/* ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --t;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    lquery = *lwork == -1;
+    notran = lsame_(trans, "N");
+    tran = lsame_(trans, "T");
+    left = lsame_(side, "L");
+    right = lsame_(side, "R");
+
+    mb = (integer) t[2];
+    nb = (integer) t[3];
+    if (left) {
+	lw = *n * mb;
+	mn = *m;
+    } else {
+	lw = *m * mb;
+	mn = *n;
+    }
+
+    if (nb > *k && mn > *k) {
+	if ((mn - *k) % (nb - *k) == 0) {
+	    nblcks = (mn - *k) / (nb - *k);
+	} else {
+	    nblcks = (mn - *k) / (nb - *k) + 1;
+	}
+    } else {
+	nblcks = 1;
+    }
+
+    *info = 0;
+    if (! left && ! right) {
+	*info = -1;
+    } else if (! tran && ! notran) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > mn) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,*k)) {
+	*info = -7;
+    } else if (*tsize < 5) {
+	*info = -9;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -11;
+    } else if (*lwork < f2cmax(1,lw) && ! lquery) {
+	*info = -13;
+    }
+
+    if (*info == 0) {
+	work[1] = (doublereal) lw;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEMLQ", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+/* Computing MIN */
+    i__1 = f2cmin(*m,*n);
+    if (f2cmin(i__1,*k) == 0) {
+	return 0;
+    }
+
+/* Computing MAX */
+    i__1 = f2cmax(*m,*n);
+    if (left && *m <= *k || right && *n <= *k || nb <= *k || nb >= f2cmax(i__1,*
+	    k)) {
+	dgemlqt_(side, trans, m, n, k, &mb, &a[a_offset], lda, &t[6], &mb, &
+		c__[c_offset], ldc, &work[1], info);
+    } else {
+	dlamswlq_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], &
+		mb, &c__[c_offset], ldc, &work[1], lwork, info);
+    }
+
+    work[1] = (doublereal) lw;
+
+    return 0;
+
+/*     End of DGEMLQ */
+
+} /* dgemlq_ */
+
diff --git a/lapack-netlib/SRC/dgemlqt.c b/lapack-netlib/SRC/dgemlqt.c
new file mode 100644
index 000000000..b924652e8
--- /dev/null
+++ b/lapack-netlib/SRC/dgemlqt.c
@@ -0,0 +1,707 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGEMLQT */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEMLQT + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgemlqt
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgemlqt
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgemlqt
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, */
+/*                          C, LDC, WORK, INFO ) */
+
+/*       CHARACTER SIDE, TRANS */
+/*       INTEGER   INFO, K, LDV, LDC, M, N, MB, LDT */
+/*       DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEMLQT overwrites the general real M-by-N matrix C with */
+/* > */
+/* >                 SIDE = 'L'     SIDE = 'R' */
+/* > TRANS = 'N':      Q C            C Q */
+/* > TRANS = 'T':   Q**T C            C Q**T */
+/* > */
+/* > where Q is a real orthogonal matrix defined as the product of K */
+/* > elementary reflectors: */
+/* > */
+/* >       Q = H(1) H(2) . . . H(K) = I - V T V**T */
+/* > */
+/* > generated using the compact WY representation as returned by DGELQT. */
+/* > */
+/* > Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'L': apply Q or Q**T from the Left; */
+/* >          = 'R': apply Q or Q**T from the Right. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N':  No transpose, apply Q; */
+/* >          = 'C':  Transpose, apply Q**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix C. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The number of elementary reflectors whose product defines */
+/* >          the matrix Q. */
+/* >          If SIDE = 'L', M >= K >= 0; */
+/* >          if SIDE = 'R', N >= K >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MB */
+/* > \verbatim */
+/* >          MB is INTEGER */
+/* >          The block size used for the storage of T.  K >= MB >= 1. */
+/* >          This must be the same value of MB used to generate T */
+/* >          in DGELQT. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* >          V is DOUBLE PRECISION array, dimension */
+/* >                               (LDV,M) if SIDE = 'L', */
+/* >                               (LDV,N) if SIDE = 'R' */
+/* >          The i-th row must contain the vector which defines the */
+/* >          elementary reflector H(i), for i = 1,2,...,k, as returned by */
+/* >          DGELQT in the first K rows of its array argument A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. LDV >= f2cmax(1,K). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (LDT,K) */
+/* >          The upper triangular factors of the block reflectors */
+/* >          as returned by DGELQT, stored as a MB-by-K matrix. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= MB. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (LDC,N) */
+/* >          On entry, the M-by-N matrix C. */
+/* >          On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array. The dimension of */
+/* >          WORK is N*MB if SIDE = 'L', or  M*MB if SIDE = 'R'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgemlqt_(char *side, char *trans, integer *m, integer *n,
+	 integer *k, integer *mb, doublereal *v, integer *ldv, doublereal *t, 
+	integer *ldt, doublereal *c__, integer *ldc, doublereal *work, 
+	integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, 
+	    i__3, i__4;
+
+    /* Local variables */
+    logical left, tran;
+    integer i__;
+    extern logical lsame_(char *, char *);
+    logical right;
+    integer ib, kf;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *), xerbla_(char *, 
+	    integer *, ftnlen);
+    logical notran;
+    integer ldwork;
+
+
+/*  -- LAPACK computational routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    right = lsame_(side, "R");
+    tran = lsame_(trans, "T");
+    notran = lsame_(trans, "N");
+
+    if (left) {
+	ldwork = f2cmax(1,*n);
+    } else if (right) {
+	ldwork = f2cmax(1,*m);
+    }
+    if (! left && ! right) {
+	*info = -1;
+    } else if (! tran && ! notran) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0) {
+	*info = -5;
+    } else if (*mb < 1 || *mb > *k && *k > 0) {
+	*info = -6;
+    } else if (*ldv < f2cmax(1,*k)) {
+	*info = -8;
+    } else if (*ldt < *mb) {
+	*info = -10;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -12;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEMLQT", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && notran) {
+
+	i__1 = *k;
+	i__2 = *mb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = *mb, i__4 = *k - i__ + 1;
+	    ib = f2cmin(i__3,i__4);
+	    i__3 = *m - i__ + 1;
+	    dlarfb_("L", "T", "F", "R", &i__3, n, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
+		    &work[1], &ldwork);
+	}
+
+    } else if (right && tran) {
+
+	i__2 = *k;
+	i__1 = *mb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+	    i__3 = *mb, i__4 = *k - i__ + 1;
+	    ib = f2cmin(i__3,i__4);
+	    i__3 = *n - i__ + 1;
+	    dlarfb_("R", "N", "F", "R", m, &i__3, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
+		    ldc, &work[1], &ldwork);
+	}
+
+    } else if (left && tran) {
+
+	kf = (*k - 1) / *mb * *mb + 1;
+	i__1 = -(*mb);
+	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = *mb, i__3 = *k - i__ + 1;
+	    ib = f2cmin(i__2,i__3);
+	    i__2 = *m - i__ + 1;
+	    dlarfb_("L", "N", "F", "R", &i__2, n, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
+		    &work[1], &ldwork);
+	}
+
+    } else if (right && notran) {
+
+	kf = (*k - 1) / *mb * *mb + 1;
+	i__1 = -(*mb);
+	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = *mb, i__3 = *k - i__ + 1;
+	    ib = f2cmin(i__2,i__3);
+	    i__2 = *n - i__ + 1;
+	    dlarfb_("R", "T", "F", "R", m, &i__2, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
+		    ldc, &work[1], &ldwork);
+	}
+
+    }
+
+    return 0;
+
+/*     End of DGEMLQT */
+
+} /* dgemlqt_ */
+
diff --git a/lapack-netlib/SRC/dgemqr.c b/lapack-netlib/SRC/dgemqr.c
new file mode 100644
index 000000000..43d9286ad
--- /dev/null
+++ b/lapack-netlib/SRC/dgemqr.c
@@ -0,0 +1,685 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGEMQR */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE DGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, */
+/*     $                   TSIZE, C, LDC, WORK, LWORK, INFO ) */
+
+
+/*     CHARACTER         SIDE, TRANS */
+/*     INTEGER           INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */
+/*     DOUBLE PRECISION  A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) */
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEMQR overwrites the general real M-by-N matrix C with */
+/* > */
+/* >                      SIDE = 'L'     SIDE = 'R' */
+/* >      TRANS = 'N':      Q * C          C * Q */
+/* >      TRANS = 'T':      Q**T * C       C * Q**T */
+/* > */
+/* > where Q is a real orthogonal matrix defined as the product */
+/* > of blocked elementary reflectors computed by tall skinny */
+/* > QR factorization (DGEQR) */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'L': apply Q or Q**T from the Left; */
+/* >          = 'R': apply Q or Q**T from the Right. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N':  No transpose, apply Q; */
+/* >          = 'T':  Transpose, apply Q**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The number of elementary reflectors whose product defines */
+/* >          the matrix Q. */
+/* >          If SIDE = 'L', M >= K >= 0; */
+/* >          if SIDE = 'R', N >= K >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,K) */
+/* >          Part of the data structure to represent Q as returned by DGEQR. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. */
+/* >          If SIDE = 'L', LDA >= f2cmax(1,M); */
+/* >          if SIDE = 'R', LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)). */
+/* >          Part of the data structure to represent Q as returned by DGEQR. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TSIZE */
+/* > \verbatim */
+/* >          TSIZE is INTEGER */
+/* >          The dimension of the array T. TSIZE >= 5. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (LDC,N) */
+/* >          On entry, the M-by-N matrix C. */
+/* >          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >         (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1, then a workspace query is assumed. The routine */
+/* >          only calculates the size of the WORK array, returns this */
+/* >          value as WORK(1), and no error message related to WORK */
+/* >          is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \par Further Details */
+/*  ==================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > These details are particular for this LAPACK implementation. Users should not */
+/* > take them for granted. These details may change in the future, and are not likely */
+/* > true for another LAPACK implementation. These details are relevant if one wants */
+/* > to try to understand the code. They are not part of the interface. */
+/* > */
+/* > In this version, */
+/* > */
+/* >          T(2): row block size (MB) */
+/* >          T(3): column block size (NB) */
+/* >          T(6:TSIZE): data structure needed for Q, computed by */
+/* >                           DLATSQR or DGEQRT */
+/* > */
+/* >  Depending on the matrix dimensions M and N, and row and column */
+/* >  block sizes MB and NB returned by ILAENV, DGEQR will use either */
+/* >  DLATSQR (if the matrix is tall-and-skinny) or DGEQRT to compute */
+/* >  the QR factorization. */
+/* >  This version of DGEMQR will use either DLAMTSQR or DGEMQRT to */
+/* >  multiply matrix Q by another matrix. */
+/* >  Further Details in DLATMSQR or DGEMQRT. */
+/* > */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgemqr_(char *side, char *trans, integer *m, integer *n, 
+	integer *k, doublereal *a, integer *lda, doublereal *t, integer *
+	tsize, doublereal *c__, integer *ldc, doublereal *work, integer *
+	lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1;
+
+    /* Local variables */
+    logical left, tran;
+    extern /* Subroutine */ int dlamtsqr_(char *, char *, integer *, integer *
+	    , integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, integer *);
+    extern logical lsame_(char *, char *);
+    logical right;
+    integer mb, nb, mn, lw, nblcks;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    logical notran, lquery;
+    extern /* Subroutine */ int dgemqrt_(char *, char *, integer *, integer *,
+	     integer *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/* ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --t;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    lquery = *lwork == -1;
+    notran = lsame_(trans, "N");
+    tran = lsame_(trans, "T");
+    left = lsame_(side, "L");
+    right = lsame_(side, "R");
+
+    mb = (integer) t[2];
+    nb = (integer) t[3];
+    if (left) {
+	lw = *n * nb;
+	mn = *m;
+    } else {
+	lw = mb * nb;
+	mn = *n;
+    }
+
+    if (mb > *k && mn > *k) {
+	if ((mn - *k) % (mb - *k) == 0) {
+	    nblcks = (mn - *k) / (mb - *k);
+	} else {
+	    nblcks = (mn - *k) / (mb - *k) + 1;
+	}
+    } else {
+	nblcks = 1;
+    }
+
+    *info = 0;
+    if (! left && ! right) {
+	*info = -1;
+    } else if (! tran && ! notran) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > mn) {
+	*info = -5;
+    } else if (*lda < f2cmax(1,mn)) {
+	*info = -7;
+    } else if (*tsize < 5) {
+	*info = -9;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -11;
+    } else if (*lwork < f2cmax(1,lw) && ! lquery) {
+	*info = -13;
+    }
+
+    if (*info == 0) {
+	work[1] = (doublereal) lw;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEMQR", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+/* Computing MIN */
+    i__1 = f2cmin(*m,*n);
+    if (f2cmin(i__1,*k) == 0) {
+	return 0;
+    }
+
+/* Computing MAX */
+    i__1 = f2cmax(*m,*n);
+    if (left && *m <= *k || right && *n <= *k || mb <= *k || mb >= f2cmax(i__1,*
+	    k)) {
+	dgemqrt_(side, trans, m, n, k, &nb, &a[a_offset], lda, &t[6], &nb, &
+		c__[c_offset], ldc, &work[1], info);
+    } else {
+	dlamtsqr_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], &
+		nb, &c__[c_offset], ldc, &work[1], lwork, info);
+    }
+
+    work[1] = (doublereal) lw;
+
+    return 0;
+
+/*     End of DGEMQR */
+
+} /* dgemqr_ */
+
diff --git a/lapack-netlib/SRC/dgemqrt.c b/lapack-netlib/SRC/dgemqrt.c
new file mode 100644
index 000000000..906dfbd68
--- /dev/null
+++ b/lapack-netlib/SRC/dgemqrt.c
@@ -0,0 +1,708 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGEMQRT */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEMQRT + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgemqrt
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgemqrt
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgemqrt
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, */
+/*                          C, LDC, WORK, INFO ) */
+
+/*       CHARACTER SIDE, TRANS */
+/*       INTEGER   INFO, K, LDV, LDC, M, N, NB, LDT */
+/*       DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEMQRT overwrites the general real M-by-N matrix C with */
+/* > */
+/* >                 SIDE = 'L'     SIDE = 'R' */
+/* > TRANS = 'N':      Q C            C Q */
+/* > TRANS = 'T':   Q**T C            C Q**T */
+/* > */
+/* > where Q is a real orthogonal matrix defined as the product of K */
+/* > elementary reflectors: */
+/* > */
+/* >       Q = H(1) H(2) . . . H(K) = I - V T V**T */
+/* > */
+/* > generated using the compact WY representation as returned by DGEQRT. */
+/* > */
+/* > Q is of order M if SIDE = 'L' and of order N  if SIDE = 'R'. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] SIDE */
+/* > \verbatim */
+/* >          SIDE is CHARACTER*1 */
+/* >          = 'L': apply Q or Q**T from the Left; */
+/* >          = 'R': apply Q or Q**T from the Right. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N':  No transpose, apply Q; */
+/* >          = 'C':  Transpose, apply Q**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix C. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix C. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] K */
+/* > \verbatim */
+/* >          K is INTEGER */
+/* >          The number of elementary reflectors whose product defines */
+/* >          the matrix Q. */
+/* >          If SIDE = 'L', M >= K >= 0; */
+/* >          if SIDE = 'R', N >= K >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NB */
+/* > \verbatim */
+/* >          NB is INTEGER */
+/* >          The block size used for the storage of T.  K >= NB >= 1. */
+/* >          This must be the same value of NB used to generate T */
+/* >          in CGEQRT. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] V */
+/* > \verbatim */
+/* >          V is DOUBLE PRECISION array, dimension (LDV,K) */
+/* >          The i-th column must contain the vector which defines the */
+/* >          elementary reflector H(i), for i = 1,2,...,k, as returned by */
+/* >          CGEQRT in the first K columns of its array argument A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. */
+/* >          If SIDE = 'L', LDA >= f2cmax(1,M); */
+/* >          if SIDE = 'R', LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (LDT,K) */
+/* >          The upper triangular factors of the block reflectors */
+/* >          as returned by CGEQRT, stored as a NB-by-N matrix. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= NB. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (LDC,N) */
+/* >          On entry, the M-by-N matrix C. */
+/* >          On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDC */
+/* > \verbatim */
+/* >          LDC is INTEGER */
+/* >          The leading dimension of the array C. LDC >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array. The dimension of */
+/* >          WORK is N*NB if SIDE = 'L', or  M*NB if SIDE = 'R'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgemqrt_(char *side, char *trans, integer *m, integer *n,
+	 integer *k, integer *nb, doublereal *v, integer *ldv, doublereal *t, 
+	integer *ldt, doublereal *c__, integer *ldc, doublereal *work, 
+	integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, 
+	    i__3, i__4;
+
+    /* Local variables */
+    logical left, tran;
+    integer i__, q;
+    extern logical lsame_(char *, char *);
+    logical right;
+    integer ib, kf;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *), xerbla_(char *, integer *, ftnlen);
+    logical notran;
+    integer ldwork;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1 * 1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    right = lsame_(side, "R");
+    tran = lsame_(trans, "T");
+    notran = lsame_(trans, "N");
+
+    if (left) {
+	ldwork = f2cmax(1,*n);
+	q = *m;
+    } else if (right) {
+	ldwork = f2cmax(1,*m);
+	q = *n;
+    }
+    if (! left && ! right) {
+	*info = -1;
+    } else if (! tran && ! notran) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > q) {
+	*info = -5;
+    } else if (*nb < 1 || *nb > *k && *k > 0) {
+	*info = -6;
+    } else if (*ldv < f2cmax(1,q)) {
+	*info = -8;
+    } else if (*ldt < *nb) {
+	*info = -10;
+    } else if (*ldc < f2cmax(1,*m)) {
+	*info = -12;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEMQRT", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && tran) {
+
+	i__1 = *k;
+	i__2 = *nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = *nb, i__4 = *k - i__ + 1;
+	    ib = f2cmin(i__3,i__4);
+	    i__3 = *m - i__ + 1;
+	    dlarfb_("L", "T", "F", "C", &i__3, n, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
+		    &work[1], &ldwork);
+	}
+
+    } else if (right && notran) {
+
+	i__2 = *k;
+	i__1 = *nb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+	    i__3 = *nb, i__4 = *k - i__ + 1;
+	    ib = f2cmin(i__3,i__4);
+	    i__3 = *n - i__ + 1;
+	    dlarfb_("R", "N", "F", "C", m, &i__3, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
+		    ldc, &work[1], &ldwork);
+	}
+
+    } else if (left && notran) {
+
+	kf = (*k - 1) / *nb * *nb + 1;
+	i__1 = -(*nb);
+	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = *nb, i__3 = *k - i__ + 1;
+	    ib = f2cmin(i__2,i__3);
+	    i__2 = *m - i__ + 1;
+	    dlarfb_("L", "N", "F", "C", &i__2, n, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, 
+		    &work[1], &ldwork);
+	}
+
+    } else if (right && tran) {
+
+	kf = (*k - 1) / *nb * *nb + 1;
+	i__1 = -(*nb);
+	for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = *nb, i__3 = *k - i__ + 1;
+	    ib = f2cmin(i__2,i__3);
+	    i__2 = *n - i__ + 1;
+	    dlarfb_("R", "T", "F", "C", m, &i__2, &ib, &v[i__ + i__ * v_dim1],
+		     ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], 
+		    ldc, &work[1], &ldwork);
+	}
+
+    }
+
+    return 0;
+
+/*     End of DGEMQRT */
+
+} /* dgemqrt_ */
+
diff --git a/lapack-netlib/SRC/dgeql2.c b/lapack-netlib/SRC/dgeql2.c
new file mode 100644
index 000000000..040cc1916
--- /dev/null
+++ b/lapack-netlib/SRC/dgeql2.c
@@ -0,0 +1,591 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b DGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorit
+hm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQL2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeql2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeql2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeql2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQL2 computes a QL factorization of a real m by n matrix A: */
+/* > A = Q * L. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, if m >= n, the lower triangle of the subarray */
+/* >          A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; */
+/* >          if m <= n, the elements on and below the (n-m)-th */
+/* >          superdiagonal contain the m by n lower trapezoidal matrix L; */
+/* >          the remaining elements, with the array TAU, represent the */
+/* >          orthogonal matrix Q as a product of elementary reflectors */
+/* >          (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */
+/* >  A(1:m-k+i-1,n-k+i), and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeql2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *), dlarfg_(integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
+    doublereal aii;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQL2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    for (i__ = k; i__ >= 1; --i__) {
+
+/*        Generate elementary reflector H(i) to annihilate */
+/*        A(1:m-k+i-1,n-k+i) */
+
+	i__1 = *m - k + i__;
+	dlarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[(*n - k 
+		+ i__) * a_dim1 + 1], &c__1, &tau[i__]);
+
+/*        Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left */
+
+	aii = a[*m - k + i__ + (*n - k + i__) * a_dim1];
+	a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.;
+	i__1 = *m - k + i__;
+	i__2 = *n - k + i__ - 1;
+	dlarf_("Left", &i__1, &i__2, &a[(*n - k + i__) * a_dim1 + 1], &c__1, &
+		tau[i__], &a[a_offset], lda, &work[1]);
+	a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGEQL2 */
+
+} /* dgeql2_ */
+
diff --git a/lapack-netlib/SRC/dgeqlf.c b/lapack-netlib/SRC/dgeqlf.c
new file mode 100644
index 000000000..aa01ef1c0
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqlf.c
@@ -0,0 +1,711 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b DGEQLF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQLF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqlf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqlf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqlf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQLF computes a QL factorization of a real M-by-N matrix A: */
+/* > A = Q * L. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          if m >= n, the lower triangle of the subarray */
+/* >          A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; */
+/* >          if m <= n, the elements on and below the (n-m)-th */
+/* >          superdiagonal contain the M-by-N lower trapezoidal matrix L; */
+/* >          the remaining elements, with the array TAU, represent the */
+/* >          orthogonal matrix Q as a product of elementary reflectors */
+/* >          (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For optimum performance LWORK >= N*NB, where NB is the */
+/* >          optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */
+/* >  A(1:m-k+i-1,n-k+i), and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqlf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo;
+    extern /* Subroutine */ int dgeql2_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer ib, nb, ki, kk;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer mu, nu, nx;
+    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+
+    if (*info == 0) {
+	k = f2cmin(*m,*n);
+	if (k == 0) {
+	    lwkopt = 1;
+	} else {
+	    nb = ilaenv_(&c__1, "DGEQLF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    lwkopt = *n * nb;
+	}
+	work[1] = (doublereal) lwkopt;
+
+	if (*lwork < f2cmax(1,*n) && ! lquery) {
+	    *info = -7;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQLF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (k == 0) {
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 1;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQLF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQLF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially. */
+/*        The last kk columns are handled by the block method. */
+
+	ki = (k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = k, i__2 = ki + nb;
+	kk = f2cmin(i__1,i__2);
+
+	i__1 = k - kk + 1;
+	i__2 = -nb;
+	for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ 
+		+= i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the QL factorization of the current block */
+/*           A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */
+
+	    i__3 = *m - k + i__ + ib - 1;
+	    dgeql2_(&i__3, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &tau[
+		    i__], &work[1], &iinfo);
+	    if (*n - k + i__ > 1) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i+ib-1) . . . H(i+1) H(i) */
+
+		i__3 = *m - k + i__ + ib - 1;
+		dlarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - k + 
+			i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */
+
+		i__3 = *m - k + i__ + ib - 1;
+		i__4 = *n - k + i__ - 1;
+		dlarfb_("Left", "Transpose", "Backward", "Columnwise", &i__3, 
+			&i__4, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &
+			work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &
+			ldwork);
+	    }
+/* L10: */
+	}
+	mu = *m - k + i__ + nb - 1;
+	nu = *n - k + i__ + nb - 1;
+    } else {
+	mu = *m;
+	nu = *n;
+    }
+
+/*     Use unblocked code to factor the last or only block */
+
+    if (mu > 0 && nu > 0) {
+	dgeql2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGEQLF */
+
+} /* dgeqlf_ */
+
diff --git a/lapack-netlib/SRC/dgeqp3.c b/lapack-netlib/SRC/dgeqp3.c
new file mode 100644
index 000000000..98279844e
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqp3.c
@@ -0,0 +1,795 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b DGEQP3 */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQP3 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqp3.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqp3.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqp3.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       INTEGER            JPVT( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQP3 computes a QR factorization with column pivoting of a */
+/* > matrix A:  A*P = Q*R  using Level 3 BLAS. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A. M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the upper triangle of the array contains the */
+/* >          f2cmin(M,N)-by-N upper trapezoidal matrix R; the elements below */
+/* >          the diagonal, together with the array TAU, represent the */
+/* >          orthogonal matrix Q as a product of f2cmin(M,N) elementary */
+/* >          reflectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A. LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] JPVT */
+/* > \verbatim */
+/* >          JPVT is INTEGER array, dimension (N) */
+/* >          On entry, if JPVT(J).ne.0, the J-th column of A is permuted */
+/* >          to the front of A*P (a leading column); if JPVT(J)=0, */
+/* >          the J-th column of A is a free column. */
+/* >          On exit, if JPVT(J)=K, then the J-th column of A*P was the */
+/* >          the K-th column of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO=0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= 3*N+1. */
+/* >          For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB */
+/* >          is the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit. */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real/complex vector */
+/* >  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in */
+/* >  A(i+1:m,i), and tau in TAU(i). */
+/* > \endverbatim */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
+/* >    X. Sun, Computer Science Dept., Duke University, USA */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqp3_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *jpvt, doublereal *tau, doublereal *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer nfxd;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    integer j, nbmin, minmn;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer minws;
+    extern /* Subroutine */ int dlaqp2_(integer *, integer *, integer *, 
+	    doublereal *, integer *, integer *, doublereal *, doublereal *, 
+	    doublereal *, doublereal *);
+    integer jb, na, nb, sm, sn, nx;
+    extern /* Subroutine */ int dgeqrf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *), 
+	    xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dlaqps_(integer *, integer *, integer *, 
+	    integer *, integer *, doublereal *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, doublereal *, 
+	    doublereal *, integer *);
+    integer topbmn, sminmn;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *);
+    integer lwkopt;
+    logical lquery;
+    integer fjb, iws;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test input arguments */
+/*  ==================== */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --jpvt;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+
+    if (*info == 0) {
+	minmn = f2cmin(*m,*n);
+	if (minmn == 0) {
+	    iws = 1;
+	    lwkopt = 1;
+	} else {
+	    iws = *n * 3 + 1;
+	    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    lwkopt = (*n << 1) + (*n + 1) * nb;
+	}
+	work[1] = (doublereal) lwkopt;
+
+	if (*lwork < iws && ! lquery) {
+	    *info = -8;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQP3", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Move initial columns up front. */
+
+    nfxd = 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (jpvt[j] != 0) {
+	    if (j != nfxd) {
+		dswap_(m, &a[j * a_dim1 + 1], &c__1, &a[nfxd * a_dim1 + 1], &
+			c__1);
+		jpvt[j] = jpvt[nfxd];
+		jpvt[nfxd] = j;
+	    } else {
+		jpvt[j] = j;
+	    }
+	    ++nfxd;
+	} else {
+	    jpvt[j] = j;
+	}
+/* L10: */
+    }
+    --nfxd;
+
+/*     Factorize fixed columns */
+/*  ======================= */
+
+/*     Compute the QR factorization of fixed columns and update */
+/*     remaining columns. */
+
+    if (nfxd > 0) {
+	na = f2cmin(*m,nfxd);
+/* CC      CALL DGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) */
+	dgeqrf_(m, &na, &a[a_offset], lda, &tau[1], &work[1], lwork, info);
+/* Computing MAX */
+	i__1 = iws, i__2 = (integer) work[1];
+	iws = f2cmax(i__1,i__2);
+	if (na < *n) {
+/* CC         CALL DORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, */
+/* CC  $                   TAU, A( 1, NA+1 ), LDA, WORK, INFO ) */
+	    i__1 = *n - na;
+	    dormqr_("Left", "Transpose", m, &i__1, &na, &a[a_offset], lda, &
+		    tau[1], &a[(na + 1) * a_dim1 + 1], lda, &work[1], lwork, 
+		    info);
+/* Computing MAX */
+	    i__1 = iws, i__2 = (integer) work[1];
+	    iws = f2cmax(i__1,i__2);
+	}
+    }
+
+/*     Factorize free columns */
+/*  ====================== */
+
+    if (nfxd < minmn) {
+
+	sm = *m - nfxd;
+	sn = *n - nfxd;
+	sminmn = minmn - nfxd;
+
+/*        Determine the block size. */
+
+	nb = ilaenv_(&c__1, "DGEQRF", " ", &sm, &sn, &c_n1, &c_n1, (ftnlen)6, 
+		(ftnlen)1);
+	nbmin = 2;
+	nx = 0;
+
+	if (nb > 1 && nb < sminmn) {
+
+/*           Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	    i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", &sm, &sn, &c_n1, &
+		    c_n1, (ftnlen)6, (ftnlen)1);
+	    nx = f2cmax(i__1,i__2);
+
+
+	    if (nx < sminmn) {
+
+/*              Determine if workspace is large enough for blocked code. */
+
+		minws = (sn << 1) + (sn + 1) * nb;
+		iws = f2cmax(iws,minws);
+		if (*lwork < minws) {
+
+/*                 Not enough workspace to use optimal NB: Reduce NB and */
+/*                 determine the minimum value of NB. */
+
+		    nb = (*lwork - (sn << 1)) / (sn + 1);
+/* Computing MAX */
+		    i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", &sm, &sn, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		    nbmin = f2cmax(i__1,i__2);
+
+
+		}
+	    }
+	}
+
+/*        Initialize partial column norms. The first N elements of work */
+/*        store the exact column norms. */
+
+	i__1 = *n;
+	for (j = nfxd + 1; j <= i__1; ++j) {
+	    work[j] = dnrm2_(&sm, &a[nfxd + 1 + j * a_dim1], &c__1);
+	    work[*n + j] = work[j];
+/* L20: */
+	}
+
+	if (nb >= nbmin && nb < sminmn && nx < sminmn) {
+
+/*           Use blocked code initially. */
+
+	    j = nfxd + 1;
+
+/*           Compute factorization: while loop. */
+
+
+	    topbmn = minmn - nx;
+L30:
+	    if (j <= topbmn) {
+/* Computing MIN */
+		i__1 = nb, i__2 = topbmn - j + 1;
+		jb = f2cmin(i__1,i__2);
+
+/*              Factorize JB columns among columns J:N. */
+
+		i__1 = *n - j + 1;
+		i__2 = j - 1;
+		i__3 = *n - j + 1;
+		dlaqps_(m, &i__1, &i__2, &jb, &fjb, &a[j * a_dim1 + 1], lda, &
+			jpvt[j], &tau[j], &work[j], &work[*n + j], &work[(*n 
+			<< 1) + 1], &work[(*n << 1) + jb + 1], &i__3);
+
+		j += fjb;
+		goto L30;
+	    }
+	} else {
+	    j = nfxd + 1;
+	}
+
+/*        Use unblocked code to factor the last or only block. */
+
+
+	if (j <= minmn) {
+	    i__1 = *n - j + 1;
+	    i__2 = j - 1;
+	    dlaqp2_(m, &i__1, &i__2, &a[j * a_dim1 + 1], lda, &jpvt[j], &tau[
+		    j], &work[j], &work[*n + j], &work[(*n << 1) + 1]);
+	}
+
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGEQP3 */
+
+} /* dgeqp3_ */
+
diff --git a/lapack-netlib/SRC/dgeqr.c b/lapack-netlib/SRC/dgeqr.c
new file mode 100644
index 000000000..f47dd1844
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqr.c
@@ -0,0 +1,735 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__2 = 2;
+
+/* > \brief \b DGEQR */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, */
+/*                         INFO ) */
+
+/*       INTEGER           INFO, LDA, M, N, TSIZE, LWORK */
+/*       DOUBLE PRECISION  A( LDA, * ), T( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQR computes a QR factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a M-by-M orthogonal matrix; */
+/* >    R is an upper-triangular N-by-N matrix; */
+/* >    0 is a (M-N)-by-N zero matrix, if M > N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R */
+/* >          (R is upper triangular if M >= N); */
+/* >          the elements below the diagonal are used to store part of the */
+/* >          data structure to represent Q. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)) */
+/* >          On exit, if INFO = 0, T(1) returns optimal (or either minimal */
+/* >          or optimal, if query is assumed) TSIZE. See TSIZE for details. */
+/* >          Remaining T contains part of the data structure used to represent Q. */
+/* >          If one wants to apply or construct Q, then one needs to keep T */
+/* >          (in addition to A) and pass it to further subroutines. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TSIZE */
+/* > \verbatim */
+/* >          TSIZE is INTEGER */
+/* >          If TSIZE >= 5, the dimension of the array T. */
+/* >          If TSIZE = -1 or -2, then a workspace query is assumed. The routine */
+/* >          only calculates the sizes of the T and WORK arrays, returns these */
+/* >          values as the first entries of the T and WORK arrays, and no error */
+/* >          message related to T or WORK is issued by XERBLA. */
+/* >          If TSIZE = -1, the routine calculates optimal size of T for the */
+/* >          optimum performance and returns this value in T(1). */
+/* >          If TSIZE = -2, the routine calculates minimal size of T and */
+/* >          returns this value in T(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
+/* >          or optimal, if query was assumed) LWORK. */
+/* >          See LWORK for details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1 or -2, then a workspace query is assumed. The routine */
+/* >          only calculates the sizes of the T and WORK arrays, returns these */
+/* >          values as the first entries of the T and WORK arrays, and no error */
+/* >          message related to T or WORK is issued by XERBLA. */
+/* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
+/* >          optimal performance and returns this value in WORK(1). */
+/* >          If LWORK = -2, the routine calculates minimal size of WORK and */
+/* >          returns this value in WORK(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \par Further Details */
+/*  ==================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The goal of the interface is to give maximum freedom to the developers for */
+/* > creating any QR factorization algorithm they wish. The triangular */
+/* > (trapezoidal) R has to be stored in the upper part of A. The lower part of A */
+/* > and the array T can be used to store any relevant information for applying or */
+/* > constructing the Q factor. The WORK array can safely be discarded after exit. */
+/* > */
+/* > Caution: One should not expect the sizes of T and WORK to be the same from one */
+/* > LAPACK implementation to the other, or even from one execution to the other. */
+/* > A workspace query (for T and WORK) is needed at each execution. However, */
+/* > for a given execution, the size of T and WORK are fixed and will not change */
+/* > from one query to the next. */
+/* > */
+/* > \endverbatim */
+/* > */
+/* > \par Further Details particular to this LAPACK implementation: */
+/*  ============================================================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* > These details are particular for this LAPACK implementation. Users should not */
+/* > take them for granted. These details may change in the future, and are not likely */
+/* > true for another LAPACK implementation. These details are relevant if one wants */
+/* > to try to understand the code. They are not part of the interface. */
+/* > */
+/* > In this version, */
+/* > */
+/* >          T(2): row block size (MB) */
+/* >          T(3): column block size (NB) */
+/* >          T(6:TSIZE): data structure needed for Q, computed by */
+/* >                           DLATSQR or DGEQRT */
+/* > */
+/* >  Depending on the matrix dimensions M and N, and row and column */
+/* >  block sizes MB and NB returned by ILAENV, DGEQR will use either */
+/* >  DLATSQR (if the matrix is tall-and-skinny) or DGEQRT to compute */
+/* >  the QR factorization. */
+/* > */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqr_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *t, integer *tsize, doublereal *work, integer *lwork, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    logical mint, minw;
+    integer mb, nb, nblcks;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dgeqrt_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    logical lminws, lquery;
+    integer mintsz;
+    extern /* Subroutine */ int dlatsqr_(integer *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --t;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2;
+
+    mint = FALSE_;
+    minw = FALSE_;
+    if (*tsize == -2 || *lwork == -2) {
+	if (*tsize != -1) {
+	    mint = TRUE_;
+	}
+	if (*lwork != -1) {
+	    minw = TRUE_;
+	}
+    }
+
+/*     Determine the block size */
+
+    if (f2cmin(*m,*n) > 0) {
+	mb = ilaenv_(&c__1, "DGEQR ", " ", m, n, &c__1, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	nb = ilaenv_(&c__1, "DGEQR ", " ", m, n, &c__2, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+    } else {
+	mb = *m;
+	nb = 1;
+    }
+    if (mb > *m || mb <= *n) {
+	mb = *m;
+    }
+    if (nb > f2cmin(*m,*n) || nb < 1) {
+	nb = 1;
+    }
+    mintsz = *n + 5;
+    if (mb > *n && *m > *n) {
+	if ((*m - *n) % (mb - *n) == 0) {
+	    nblcks = (*m - *n) / (mb - *n);
+	} else {
+	    nblcks = (*m - *n) / (mb - *n) + 1;
+	}
+    } else {
+	nblcks = 1;
+    }
+
+/*     Determine if the workspace size satisfies minimal size */
+
+    lminws = FALSE_;
+/* Computing MAX */
+    i__1 = 1, i__2 = nb * *n * nblcks + 5;
+    if ((*tsize < f2cmax(i__1,i__2) || *lwork < nb * *n) && *lwork >= *n && *
+	    tsize >= mintsz && ! lquery) {
+/* Computing MAX */
+	i__1 = 1, i__2 = nb * *n * nblcks + 5;
+	if (*tsize < f2cmax(i__1,i__2)) {
+	    lminws = TRUE_;
+	    nb = 1;
+	    mb = *m;
+	}
+	if (*lwork < nb * *n) {
+	    lminws = TRUE_;
+	    nb = 1;
+	}
+    }
+
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = nb * *n * nblcks + 5;
+	if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
+	    *info = -6;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n * nb;
+	    if (*lwork < f2cmax(i__1,i__2) && ! lquery && ! lminws) {
+		*info = -8;
+	    }
+	}
+    }
+
+    if (*info == 0) {
+	if (mint) {
+	    t[1] = (doublereal) mintsz;
+	} else {
+	    t[1] = (doublereal) (nb * *n * nblcks + 5);
+	}
+	t[2] = (doublereal) mb;
+	t[3] = (doublereal) nb;
+	if (minw) {
+	    work[1] = (doublereal) f2cmax(1,*n);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = nb * *n;
+	    work[1] = (doublereal) f2cmax(i__1,i__2);
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQR", &i__1, (ftnlen)5);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (f2cmin(*m,*n) == 0) {
+	return 0;
+    }
+
+/*     The QR Decomposition */
+
+    if (*m <= *n || mb <= *n || mb >= *m) {
+	dgeqrt_(m, n, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], info);
+    } else {
+	dlatsqr_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], 
+		lwork, info);
+    }
+
+/* Computing MAX */
+    i__1 = 1, i__2 = nb * *n;
+    work[1] = (doublereal) f2cmax(i__1,i__2);
+
+    return 0;
+
+/*     End of DGEQR */
+
+} /* dgeqr_ */
+
diff --git a/lapack-netlib/SRC/dgeqr2.c b/lapack-netlib/SRC/dgeqr2.c
new file mode 100644
index 000000000..e5ed2b269
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqr2.c
@@ -0,0 +1,602 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorit
+hm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQR2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQR2 computes a QR factorization of a real m-by-n matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a m-by-m orthogonal matrix; */
+/* >    R is an upper-triangular n-by-n matrix; */
+/* >    0 is a (m-n)-by-n zero matrix, if m > n. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */
+/* >          upper triangular if m >= n); the elements below the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of elementary reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
+/* >  and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *), dlarfg_(integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
+    doublereal aii;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQR2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1]
+		, &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGEQR2 */
+
+} /* dgeqr2_ */
+
diff --git a/lapack-netlib/SRC/dgeqr2p.c b/lapack-netlib/SRC/dgeqr2p.c
new file mode 100644
index 000000000..1bd3a5c8c
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqr2p.c
@@ -0,0 +1,607 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+
+/* > \brief \b DGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagona
+l elements using an unblocked algorithm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQR2P + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqr2p
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqr2p
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqr2p
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQR2P computes a QR factorization of a real m-by-n matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a m-by-m orthogonal matrix; */
+/* >    R is an upper-triangular n-by-n matrix with nonnegative diagonal */
+/* >    entries; */
+/* >    0 is a (m-n)-by-n zero matrix, if m > n. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */
+/* >          upper triangular if m >= n). The diagonal entries of R are */
+/* >          nonnegative; the elements below the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of elementary reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
+/* >  and tau in TAU(i). */
+/* > */
+/* > See Lapack Working Note 203 for details */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqr2p_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *), xerbla_(char *, integer *, ftnlen);
+    doublereal aii;
+    extern /* Subroutine */ int dlarfgp_(integer *, doublereal *, doublereal *
+	    , integer *, doublereal *);
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQR2P", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	dlarfgp_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * 
+		a_dim1], &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGEQR2P */
+
+} /* dgeqr2p_ */
+
diff --git a/lapack-netlib/SRC/dgeqrf.c b/lapack-netlib/SRC/dgeqrf.c
new file mode 100644
index 000000000..84ab43763
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqrf.c
@@ -0,0 +1,702 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b DGEQRF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQRF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQRF computes a QR factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a M-by-M orthogonal matrix; */
+/* >    R is an upper-triangular N-by-N matrix; */
+/* >    0 is a (M-N)-by-N zero matrix, if M > N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
+/* >          upper triangular if m >= n); the elements below the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of f2cmin(m,n) elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For optimum performance LWORK >= N*NB, where NB is */
+/* >          the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
+/* >  and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo;
+    extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer ib, nb;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer nx;
+    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQRF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the QR factorization of the current block */
+/*           A(i:m,i:i+ib-1) */
+
+	    i__3 = *m - i__ + 1;
+	    dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i) H(i+1) . . . H(i+ib-1) */
+
+		i__3 = *m - i__ + 1;
+		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * 
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H**T to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib 
+			+ 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGEQRF */
+
+} /* dgeqrf_ */
+
diff --git a/lapack-netlib/SRC/dgeqrfp.c b/lapack-netlib/SRC/dgeqrfp.c
new file mode 100644
index 000000000..9cf4ef1ee
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqrfp.c
@@ -0,0 +1,705 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b DGEQRFP */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQRFP + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrfp
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrfp
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrfp
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQR2P computes a QR factorization of a real M-by-N matrix A: */
+/* > */
+/* >    A = Q * ( R ), */
+/* >            ( 0 ) */
+/* > */
+/* > where: */
+/* > */
+/* >    Q is a M-by-M orthogonal matrix; */
+/* >    R is an upper-triangular N-by-N matrix with nonnegative diagonal */
+/* >    entries; */
+/* >    0 is a (M-N)-by-N zero matrix, if M > N. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
+/* >          upper triangular if m >= n). The diagonal entries of R */
+/* >          are nonnegative; the elements below the diagonal, */
+/* >          with the array TAU, represent the orthogonal matrix Q as a */
+/* >          product of f2cmin(m,n) elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For optimum performance LWORK >= N*NB, where NB is */
+/* >          the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2019 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */
+/* >  and tau in TAU(i). */
+/* > */
+/* > See Lapack Working Note 203 for details */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqrfp_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo, ib, nb;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer nx;
+    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    extern /* Subroutine */ int dgeqr2p_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.9.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2019 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQRFP", &i__1, (ftnlen)7);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the QR factorization of the current block */
+/*           A(i:m,i:i+ib-1) */
+
+	    i__3 = *m - i__ + 1;
+	    dgeqr2p_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i) H(i+1) . . . H(i+ib-1) */
+
+		i__3 = *m - i__ + 1;
+		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * 
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H**T to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib 
+			+ 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	dgeqr2p_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		1], &iinfo);
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGEQRFP */
+
+} /* dgeqrfp_ */
+
diff --git a/lapack-netlib/SRC/dgeqrt.c b/lapack-netlib/SRC/dgeqrt.c
new file mode 100644
index 000000000..5568853eb
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqrt.c
@@ -0,0 +1,631 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGEQRT */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQRT + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrt.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrt.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrt.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) */
+
+/*       INTEGER INFO, LDA, LDT, M, N, NB */
+/*       DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQRT computes a blocked QR factorization of a real M-by-N matrix A */
+/* > using the compact WY representation of Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NB */
+/* > \verbatim */
+/* >          NB is INTEGER */
+/* >          The block size to be used in the blocked QR.  MIN(M,N) >= NB >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the elements on and above the diagonal of the array */
+/* >          contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */
+/* >          upper triangular if M >= N); the elements below the diagonal */
+/* >          are the columns of V. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (LDT,MIN(M,N)) */
+/* >          The upper triangular block reflectors stored in compact form */
+/* >          as a sequence of upper triangular blocks.  See below */
+/* >          for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= NB. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (NB*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
+/* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1       ) */
+/* >                   ( v1  1    ) */
+/* >                   ( v1 v2  1 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A. */
+/* > */
+/* >  Let K=MIN(M,N).  The number of blocks is B = ceiling(K/NB), where each */
+/* >  block is of order NB except for the last block, which is of order */
+/* >  IB = K - (B-1)*NB.  For each of the B blocks, a upper triangular block */
+/* >  reflector factor is computed: T1, T2, ..., TB.  The NB-by-NB (and IB-by-IB */
+/* >  for the last block) T's are stored in the NB-by-K matrix T as */
+/* > */
+/* >               T = (T1 T2 ... TB). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqrt_(integer *m, integer *n, integer *nb, doublereal *
+	a, integer *lda, doublereal *t, integer *ldt, doublereal *work, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    integer i__, k, iinfo, ib;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *), xerbla_(char *, 
+	    integer *, ftnlen), dgeqrt2_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *), dgeqrt3_(integer *
+	    , integer *, doublereal *, integer *, doublereal *, integer *, 
+	    integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/* ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nb < 1 || *nb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldt < *nb) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQRT", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = f2cmin(*m,*n);
+    if (k == 0) {
+	return 0;
+    }
+
+/*     Blocked loop of length K */
+
+    i__1 = k;
+    i__2 = *nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	i__3 = k - i__ + 1;
+	ib = f2cmin(i__3,*nb);
+
+/*     Compute the QR factorization of the current block A(I:M,I:I+IB-1) */
+
+	if (TRUE_) {
+	    i__3 = *m - i__ + 1;
+	    dgeqrt3_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 
+		    + 1], ldt, &iinfo);
+	} else {
+	    i__3 = *m - i__ + 1;
+	    dgeqrt2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 
+		    + 1], ldt, &iinfo);
+	}
+	if (i__ + ib <= *n) {
+
+/*     Update by applying H**T to A(I:M,I+IB:N) from the left */
+
+	    i__3 = *m - i__ + 1;
+	    i__4 = *n - i__ - ib + 1;
+	    i__5 = *n - i__ - ib + 1;
+	    dlarfb_("L", "T", "F", "C", &i__3, &i__4, &ib, &a[i__ + i__ * 
+		    a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + (i__ + 
+		    ib) * a_dim1], lda, &work[1], &i__5);
+	}
+    }
+    return 0;
+
+/*     End of DGEQRT */
+
+} /* dgeqrt_ */
+
diff --git a/lapack-netlib/SRC/dgeqrt2.c b/lapack-netlib/SRC/dgeqrt2.c
new file mode 100644
index 000000000..8165f9bf4
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqrt2.c
@@ -0,0 +1,648 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b5 = 1.;
+static doublereal c_b7 = 0.;
+
+/* > \brief \b DGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY re
+presentation of Q. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQRT2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrt2
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrt2
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrt2
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGEQRT2( M, N, A, LDA, T, LDT, INFO ) */
+
+/*       INTEGER   INFO, LDA, LDT, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), T( LDT, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQRT2 computes a QR factorization of a real M-by-N matrix A, */
+/* > using the compact WY representation of Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
+/* >          above the diagonal contain the N-by-N upper triangular matrix R; the */
+/* >          elements below the diagonal are the columns of V.  See below for */
+/* >          further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (LDT,N) */
+/* >          The N-by-N upper triangular factor of the block reflector. */
+/* >          The elements on and above the diagonal contain the block */
+/* >          reflector T; the elements below the diagonal are not used. */
+/* >          See below for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
+/* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1       ) */
+/* >                   ( v1  1    ) */
+/* >                   ( v1 v2  1 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
+/* >  block reflector H is then given by */
+/* > */
+/* >               H = I - V * T * V**T */
+/* > */
+/* >  where V**T is the transpose of V. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqrt2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *t, integer *ldt, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer i__, k;
+    doublereal alpha;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *), dtrmv_(char *, 
+	    char *, char *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *), dlarfg_(integer *, doublereal 
+	    *, doublereal *, integer *, doublereal *), xerbla_(char *, 
+	    integer *, ftnlen);
+    doublereal aii;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*ldt < f2cmax(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQRT2", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1]
+		, &c__1, &t[i__ + t_dim1]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(I:M,I+1:N) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+
+/*           W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    dgemv_("T", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], 
+		    lda, &a[i__ + i__ * a_dim1], &c__1, &c_b7, &t[*n * t_dim1 
+		    + 1], &c__1);
+
+/*           A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H */
+
+	    alpha = -t[i__ + t_dim1];
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    dger_(&i__2, &i__3, &alpha, &a[i__ + i__ * a_dim1], &c__1, &t[*n *
+		     t_dim1 + 1], &c__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+    }
+
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	aii = a[i__ + i__ * a_dim1];
+	a[i__ + i__ * a_dim1] = 1.;
+
+/*        T(1:I-1,I) := alpha * A(I:M,1:I-1)**T * A(I:M,I) */
+
+	alpha = -t[i__ + t_dim1];
+	i__2 = *m - i__ + 1;
+	i__3 = i__ - 1;
+	dgemv_("T", &i__2, &i__3, &alpha, &a[i__ + a_dim1], lda, &a[i__ + i__ 
+		* a_dim1], &c__1, &c_b7, &t[i__ * t_dim1 + 1], &c__1);
+	a[i__ + i__ * a_dim1] = aii;
+
+/*        T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) */
+
+	i__2 = i__ - 1;
+	dtrmv_("U", "N", "N", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], 
+		&c__1);
+
+/*           T(I,I) = tau(I) */
+
+	t[i__ + i__ * t_dim1] = t[i__ + t_dim1];
+	t[i__ + t_dim1] = 0.;
+    }
+
+/*     End of DGEQRT2 */
+
+    return 0;
+} /* dgeqrt2_ */
+
diff --git a/lapack-netlib/SRC/dgeqrt3.c b/lapack-netlib/SRC/dgeqrt3.c
new file mode 100644
index 000000000..f0f8299b8
--- /dev/null
+++ b/lapack-netlib/SRC/dgeqrt3.c
@@ -0,0 +1,681 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b8 = 1.;
+static doublereal c_b20 = -1.;
+
+/* > \brief \b DGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the c
+ompact WY representation of Q. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGEQRT3 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgeqrt3
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgeqrt3
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgeqrt3
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*        SUBROUTINE DGEQRT3( M, N, A, LDA, T, LDT, INFO ) */
+
+/*       INTEGER   INFO, LDA, M, N, LDT */
+/*       DOUBLE PRECISION   A( LDA, * ), T( LDT, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGEQRT3 recursively computes a QR factorization of a real M-by-N */
+/* > matrix A, using the compact WY representation of Q. */
+/* > */
+/* > Based on the algorithm of Elmroth and Gustavson, */
+/* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the real M-by-N matrix A.  On exit, the elements on and */
+/* >          above the diagonal contain the N-by-N upper triangular matrix R; the */
+/* >          elements below the diagonal are the columns of V.  See below for */
+/* >          further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] T */
+/* > \verbatim */
+/* >          T is DOUBLE PRECISION array, dimension (LDT,N) */
+/* >          The N-by-N upper triangular factor of the block reflector. */
+/* >          The elements on and above the diagonal contain the block */
+/* >          reflector T; the elements below the diagonal are not used. */
+/* >          See below for further details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDT */
+/* > \verbatim */
+/* >          LDT is INTEGER */
+/* >          The leading dimension of the array T.  LDT >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix V stores the elementary reflectors H(i) in the i-th column */
+/* >  below the diagonal. For example, if M=5 and N=3, the matrix V is */
+/* > */
+/* >               V = (  1       ) */
+/* >                   ( v1  1    ) */
+/* >                   ( v1 v2  1 ) */
+/* >                   ( v1 v2 v3 ) */
+/* >                   ( v1 v2 v3 ) */
+/* > */
+/* >  where the vi's represent the vectors which define H(i), which are returned */
+/* >  in the matrix A.  The 1's along the diagonal of V are not stored in A.  The */
+/* >  block reflector H is then given by */
+/* > */
+/* >               H = I - V * T * V**T */
+/* > */
+/* >  where V**T is the transpose of V. */
+/* > */
+/* >  For details of the algorithm, see Elmroth and Gustavson (cited above). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgeqrt3_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *t, integer *ldt, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2;
+
+    /* Local variables */
+    integer i__, j;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer iinfo;
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer i1, j1, n1, n2;
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1 * 1;
+    t -= t_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -2;
+    } else if (*m < *n) {
+	*info = -1;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    } else if (*ldt < f2cmax(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQRT3", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    if (*n == 1) {
+
+/*        Compute Householder transform when N=1 */
+
+	dlarfg_(m, &a[a_dim1 + 1], &a[f2cmin(2,*m) + a_dim1], &c__1, &t[t_dim1 + 
+		1]);
+
+    } else {
+
+/*        Otherwise, split A into blocks... */
+
+	n1 = *n / 2;
+	n2 = *n - n1;
+/* Computing MIN */
+	i__1 = n1 + 1;
+	j1 = f2cmin(i__1,*n);
+/* Computing MIN */
+	i__1 = *n + 1;
+	i1 = f2cmin(i__1,*m);
+
+/*        Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */
+
+	dgeqrt3_(m, &n1, &a[a_offset], lda, &t[t_offset], ldt, &iinfo);
+
+/*        Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] */
+
+	i__1 = n2;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = n1;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		t[i__ + (j + n1) * t_dim1] = a[i__ + (j + n1) * a_dim1];
+	    }
+	}
+	dtrmm_("L", "L", "T", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 *
+		 t_dim1 + 1], ldt)
+		;
+
+	i__1 = *m - n1;
+	dgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[j1 + a_dim1], lda, &a[j1 
+		+ j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt);
+
+	dtrmm_("L", "U", "T", "N", &n1, &n2, &c_b8, &t[t_offset], ldt, &t[j1 *
+		 t_dim1 + 1], ldt)
+		;
+
+	i__1 = *m - n1;
+	dgemm_("N", "N", &i__1, &n2, &n1, &c_b20, &a[j1 + a_dim1], lda, &t[j1 
+		* t_dim1 + 1], ldt, &c_b8, &a[j1 + j1 * a_dim1], lda);
+
+	dtrmm_("L", "L", "N", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 *
+		 t_dim1 + 1], ldt)
+		;
+
+	i__1 = n2;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = n1;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + (j + n1) * a_dim1] -= t[i__ + (j + n1) * t_dim1];
+	    }
+	}
+
+/*        Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */
+
+	i__1 = *m - n1;
+	dgeqrt3_(&i__1, &n2, &a[j1 + j1 * a_dim1], lda, &t[j1 + j1 * t_dim1], 
+		ldt, &iinfo);
+
+/*        Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 */
+
+	i__1 = n1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = n2;
+	    for (j = 1; j <= i__2; ++j) {
+		t[i__ + (j + n1) * t_dim1] = a[j + n1 + i__ * a_dim1];
+	    }
+	}
+
+	dtrmm_("R", "L", "N", "U", &n1, &n2, &c_b8, &a[j1 + j1 * a_dim1], lda,
+		 &t[j1 * t_dim1 + 1], ldt);
+
+	i__1 = *m - *n;
+	dgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[i1 + a_dim1], lda, &a[i1 
+		+ j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt);
+
+	dtrmm_("L", "U", "N", "N", &n1, &n2, &c_b20, &t[t_offset], ldt, &t[j1 
+		* t_dim1 + 1], ldt);
+
+	dtrmm_("R", "U", "N", "N", &n1, &n2, &c_b8, &t[j1 + j1 * t_dim1], ldt,
+		 &t[j1 * t_dim1 + 1], ldt);
+
+/*        Y = (Y1,Y2); R = [ R1  A(1:N1,J1:N) ];  T = [T1 T3] */
+/*                         [  0        R2     ]       [ 0 T2] */
+
+    }
+
+    return 0;
+
+/*     End of DGEQRT3 */
+
+} /* dgeqrt3_ */
+
diff --git a/lapack-netlib/SRC/dgerfs.c b/lapack-netlib/SRC/dgerfs.c
new file mode 100644
index 000000000..83be100ab
--- /dev/null
+++ b/lapack-netlib/SRC/dgerfs.c
@@ -0,0 +1,882 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b15 = -1.;
+static doublereal c_b17 = 1.;
+
+/* > \brief \b DGERFS */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGERFS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgerfs.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgerfs.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgerfs.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */
+/*                          X, LDX, FERR, BERR, WORK, IWORK, INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
+/*      $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGERFS improves the computed solution to a system of linear */
+/* > equations and provides error bounds and backward error estimates for */
+/* > the solution. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations: */
+/* >          = 'N':  A * X = B     (No transpose) */
+/* >          = 'T':  A**T * X = B  (Transpose) */
+/* >          = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          The original N-by-N matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AF */
+/* > \verbatim */
+/* >          AF is DOUBLE PRECISION array, dimension (LDAF,N) */
+/* >          The factors L and U from the factorization A = P*L*U */
+/* >          as computed by DGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices from DGETRF; for 1<=i<=N, row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          The right hand side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
+/* >          On entry, the solution matrix X, as computed by DGETRS. */
+/* >          On exit, the improved solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] FERR */
+/* > \verbatim */
+/* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The estimated forward error bound for each solution vector */
+/* >          X(j) (the j-th column of the solution matrix X). */
+/* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
+/* >          is an estimated upper bound for the magnitude of the largest */
+/* >          element in (X(j) - XTRUE) divided by the magnitude of the */
+/* >          largest element in X(j).  The estimate is as reliable as */
+/* >          the estimate for RCOND, and is almost always a slight */
+/* >          overestimate of the true error. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The componentwise relative backward error of each solution */
+/* >          vector X(j) (i.e., the smallest relative change in */
+/* >          any element of A or B that makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (3*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/* > \par Internal Parameters: */
+/*  ========================= */
+/* > */
+/* > \verbatim */
+/* >  ITMAX is the maximum number of steps of iterative refinement. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgerfs_(char *trans, integer *n, integer *nrhs, 
+	doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *
+	ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, 
+	doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
+	    x_offset, i__1, i__2, i__3;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    integer kase;
+    doublereal safe1, safe2;
+    integer i__, j, k;
+    doublereal s;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *);
+    integer isave[3];
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *), daxpy_(integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *);
+    integer count;
+    extern /* Subroutine */ int dlacn2_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *);
+    doublereal xk;
+    integer nz;
+    doublereal safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dgetrs_(
+	    char *, integer *, integer *, doublereal *, integer *, integer *, 
+	    doublereal *, integer *, integer *);
+    logical notran;
+    char transt[1];
+    doublereal lstres, eps;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --ferr;
+    --berr;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldaf < f2cmax(1,*n)) {
+	*info = -7;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -10;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGERFS", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    ferr[j] = 0.;
+	    berr[j] = 0.;
+/* L10: */
+	}
+	return 0;
+    }
+
+    if (notran) {
+	*(unsigned char *)transt = 'T';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */
+
+    nz = *n + 1;
+    eps = dlamch_("Epsilon");
+    safmin = dlamch_("Safe minimum");
+    safe1 = nz * safmin;
+    safe2 = safe1 / eps;
+
+/*     Do for each right hand side */
+
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+
+	count = 1;
+	lstres = 3.;
+L20:
+
+/*        Loop until stopping criterion is satisfied. */
+
+/*        Compute residual R = B - op(A) * X, */
+/*        where op(A) = A, A**T, or A**H, depending on TRANS. */
+
+	dcopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
+	dgemv_(trans, n, n, &c_b15, &a[a_offset], lda, &x[j * x_dim1 + 1], &
+		c__1, &c_b17, &work[*n + 1], &c__1);
+
+/*        Compute componentwise relative backward error from formula */
+
+/*        f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */
+
+/*        where abs(Z) is the componentwise absolute value of the matrix */
+/*        or vector Z.  If the i-th component of the denominator is less */
+/*        than SAFE2, then SAFE1 is added to the i-th components of the */
+/*        numerator and denominator before dividing. */
+
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1));
+/* L30: */
+	}
+
+/*        Compute abs(op(A))*abs(X) + abs(B). */
+
+	if (notran) {
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		xk = (d__1 = x[k + j * x_dim1], abs(d__1));
+		i__3 = *n;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    work[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * xk;
+/* L40: */
+		}
+/* L50: */
+	    }
+	} else {
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		s = 0.;
+		i__3 = *n;
+		for (i__ = 1; i__ <= i__3; ++i__) {
+		    s += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = x[
+			    i__ + j * x_dim1], abs(d__2));
+/* L60: */
+		}
+		work[k] += s;
+/* L70: */
+	    }
+	}
+	s = 0.;
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    if (work[i__] > safe2) {
+/* Computing MAX */
+		d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[
+			i__];
+		s = f2cmax(d__2,d__3);
+	    } else {
+/* Computing MAX */
+		d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) 
+			/ (work[i__] + safe1);
+		s = f2cmax(d__2,d__3);
+	    }
+/* L80: */
+	}
+	berr[j] = s;
+
+/*        Test stopping criterion. Continue iterating if */
+/*           1) The residual BERR(J) is larger than machine epsilon, and */
+/*           2) BERR(J) decreased by at least a factor of 2 during the */
+/*              last iteration, and */
+/*           3) At most ITMAX iterations tried. */
+
+	if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) {
+
+/*           Update solution and try again. */
+
+	    dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n 
+		    + 1], n, info);
+	    daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
+		    ;
+	    lstres = berr[j];
+	    ++count;
+	    goto L20;
+	}
+
+/*        Bound error from formula */
+
+/*        norm(X - XTRUE) / norm(X) .le. FERR = */
+/*        norm( abs(inv(op(A)))* */
+/*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */
+
+/*        where */
+/*          norm(Z) is the magnitude of the largest component of Z */
+/*          inv(op(A)) is the inverse of op(A) */
+/*          abs(Z) is the componentwise absolute value of the matrix or */
+/*             vector Z */
+/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
+/*          EPS is machine epsilon */
+
+/*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
+/*        is incremented by SAFE1 if the i-th component of */
+/*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */
+
+/*        Use DLACN2 to estimate the infinity-norm of the matrix */
+/*           inv(op(A)) * diag(W), */
+/*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
+
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    if (work[i__] > safe2) {
+		work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * 
+			work[i__];
+	    } else {
+		work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * 
+			work[i__] + safe1;
+	    }
+/* L90: */
+	}
+
+	kase = 0;
+L100:
+	dlacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
+		kase, isave);
+	if (kase != 0) {
+	    if (kase == 1) {
+
+/*              Multiply by diag(W)*inv(op(A)**T). */
+
+		dgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
+			work[*n + 1], n, info);
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    work[*n + i__] = work[i__] * work[*n + i__];
+/* L110: */
+		}
+	    } else {
+
+/*              Multiply by inv(op(A))*diag(W). */
+
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    work[*n + i__] = work[i__] * work[*n + i__];
+/* L120: */
+		}
+		dgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
+			work[*n + 1], n, info);
+	    }
+	    goto L100;
+	}
+
+/*        Normalize error. */
+
+	lstres = 0.;
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+	    d__2 = lstres, d__3 = (d__1 = x[i__ + j * x_dim1], abs(d__1));
+	    lstres = f2cmax(d__2,d__3);
+/* L130: */
+	}
+	if (lstres != 0.) {
+	    ferr[j] /= lstres;
+	}
+
+/* L140: */
+    }
+
+    return 0;
+
+/*     End of DGERFS */
+
+} /* dgerfs_ */
+
diff --git a/lapack-netlib/SRC/dgerfsx.c b/lapack-netlib/SRC/dgerfsx.c
new file mode 100644
index 000000000..7610f487e
--- /dev/null
+++ b/lapack-netlib/SRC/dgerfsx.c
@@ -0,0 +1,1148 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__0 = 0;
+static integer c__1 = 1;
+
+/* > \brief \b DGERFSX */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGERFSX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgerfsx
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgerfsx
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgerfsx
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, */
+/*                           R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, */
+/*                           ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, */
+/*                           WORK, IWORK, INFO ) */
+
+/*       CHARACTER          TRANS, EQUED */
+/*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */
+/*      $                   N_ERR_BNDS */
+/*       DOUBLE PRECISION   RCOND */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
+/*      $                   X( LDX , * ), WORK( * ) */
+/*       DOUBLE PRECISION   R( * ), C( * ), PARAMS( * ), BERR( * ), */
+/*      $                   ERR_BNDS_NORM( NRHS, * ), */
+/*      $                   ERR_BNDS_COMP( NRHS, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    DGERFSX improves the computed solution to a system of linear */
+/* >    equations and provides error bounds and backward error estimates */
+/* >    for the solution.  In addition to normwise error bound, the code */
+/* >    provides maximum componentwise error bound if possible.  See */
+/* >    comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */
+/* >    error bounds. */
+/* > */
+/* >    The original system of linear equations may have been equilibrated */
+/* >    before calling this routine, as described by arguments EQUED, R */
+/* >    and C below. In this case, the solution and error bounds returned */
+/* >    are for the original unequilibrated system. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \verbatim */
+/* >     Some optional parameters are bundled in the PARAMS array.  These */
+/* >     settings determine how refinement is performed, but often the */
+/* >     defaults are acceptable.  If the defaults are acceptable, users */
+/* >     can pass NPARAMS = 0 which prevents the source code from accessing */
+/* >     the PARAMS argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >     Specifies the form of the system of equations: */
+/* >       = 'N':  A * X = B     (No transpose) */
+/* >       = 'T':  A**T * X = B  (Transpose) */
+/* >       = 'C':  A**H * X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] EQUED */
+/* > \verbatim */
+/* >          EQUED is CHARACTER*1 */
+/* >     Specifies the form of equilibration that was done to A */
+/* >     before calling this routine. This is needed to compute */
+/* >     the solution and error bounds correctly. */
+/* >       = 'N':  No equilibration */
+/* >       = 'R':  Row equilibration, i.e., A has been premultiplied by */
+/* >               diag(R). */
+/* >       = 'C':  Column equilibration, i.e., A has been postmultiplied */
+/* >               by diag(C). */
+/* >       = 'B':  Both row and column equilibration, i.e., A has been */
+/* >               replaced by diag(R) * A * diag(C). */
+/* >               The right hand side B has been changed accordingly. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >     The number of right hand sides, i.e., the number of columns */
+/* >     of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >     The original N-by-N matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >     The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] AF */
+/* > \verbatim */
+/* >          AF is DOUBLE PRECISION array, dimension (LDAF,N) */
+/* >     The factors L and U from the factorization A = P*L*U */
+/* >     as computed by DGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >     The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     The pivot indices from DGETRF; for 1<=i<=N, row i of the */
+/* >     matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (N) */
+/* >     The row scale factors for A.  If EQUED = 'R' or 'B', A is */
+/* >     multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */
+/* >     is not accessed. */
+/* >     If R is accessed, each element of R should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >     The column scale factors for A.  If EQUED = 'C' or 'B', A is */
+/* >     multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */
+/* >     is not accessed. */
+/* >     If C is accessed, each element of C should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >     The right hand side matrix B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >     The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] X */
+/* > \verbatim */
+/* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
+/* >     On entry, the solution matrix X, as computed by DGETRS. */
+/* >     On exit, the improved solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >     The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >     Reciprocal scaled condition number.  This is an estimate of the */
+/* >     reciprocal Skeel condition number of the matrix A after */
+/* >     equilibration (if done).  If this is less than the machine */
+/* >     precision (in particular, if it is zero), the matrix is singular */
+/* >     to working precision.  Note that the error may still be small even */
+/* >     if this number is very small and the matrix appears ill- */
+/* >     conditioned. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >     Componentwise relative backward error.  This is the */
+/* >     componentwise relative backward error of each solution vector X(j) */
+/* >     (i.e., the smallest relative change in any element of A or B that */
+/* >     makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N_ERR_BNDS */
+/* > \verbatim */
+/* >          N_ERR_BNDS is INTEGER */
+/* >     Number of error bounds to return for each right hand side */
+/* >     and each type (normwise or componentwise).  See ERR_BNDS_NORM and */
+/* >     ERR_BNDS_COMP below. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_NORM */
+/* > \verbatim */
+/* >          ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     normwise relative error, which is defined as follows: */
+/* > */
+/* >     Normwise relative error in the ith solution vector: */
+/* >             max_j (abs(XTRUE(j,i) - X(j,i))) */
+/* >            ------------------------------ */
+/* >                  max_j abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the type of error information as described */
+/* >     below. There currently are up to three pieces of information */
+/* >     returned. */
+/* > */
+/* >     The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_NORM(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated normwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * dlamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*A, where S scales each row by a power of the */
+/* >              radix so all absolute row sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_COMP */
+/* > \verbatim */
+/* >          ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     componentwise relative error, which is defined as follows: */
+/* > */
+/* >     Componentwise relative error in the ith solution vector: */
+/* >                    abs(XTRUE(j,i) - X(j,i)) */
+/* >             max_j ---------------------- */
+/* >                         abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the right-hand side i (on which the */
+/* >     componentwise relative error depends), and the type of error */
+/* >     information as described below. There currently are up to three */
+/* >     pieces of information returned for each right-hand side. If */
+/* >     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
+/* >     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most */
+/* >     the first (:,N_ERR_BNDS) entries are returned. */
+/* > */
+/* >     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_COMP(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated componentwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * dlamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*(A*diag(x)), where x is the solution for the */
+/* >              current right-hand side and S scales each row of */
+/* >              A*diag(x) by a power of the radix so all absolute row */
+/* >              sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NPARAMS */
+/* > \verbatim */
+/* >          NPARAMS is INTEGER */
+/* >     Specifies the number of parameters set in PARAMS.  If <= 0, the */
+/* >     PARAMS array is never referenced and default values are used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] PARAMS */
+/* > \verbatim */
+/* >          PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) */
+/* >     Specifies algorithm parameters.  If an entry is < 0.0, then */
+/* >     that entry will be filled with default value used for that */
+/* >     parameter.  Only positions up to NPARAMS are accessed; defaults */
+/* >     are used for higher-numbered parameters. */
+/* > */
+/* >       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */
+/* >            refinement or not. */
+/* >         Default: 1.0D+0 */
+/* >            = 0.0:  No refinement is performed, and no error bounds are */
+/* >                    computed. */
+/* >            = 1.0:  Use the double-precision refinement algorithm, */
+/* >                    possibly with doubled-single computations if the */
+/* >                    compilation environment does not support DOUBLE */
+/* >                    PRECISION. */
+/* >              (other values are reserved for future use) */
+/* > */
+/* >       PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */
+/* >            computations allowed for refinement. */
+/* >         Default: 10 */
+/* >         Aggressive: Set to 100 to permit convergence using approximate */
+/* >                     factorizations or factorizations other than LU. If */
+/* >                     the factorization uses a technique other than */
+/* >                     Gaussian elimination, the guarantees in */
+/* >                     err_bnds_norm and err_bnds_comp may no longer be */
+/* >                     trustworthy. */
+/* > */
+/* >       PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */
+/* >            will attempt to find a solution with small componentwise */
+/* >            relative error in the double-precision algorithm.  Positive */
+/* >            is true, 0.0 is false. */
+/* >         Default: 1.0 (attempt componentwise convergence) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (4*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. The solution to every right-hand side is */
+/* >         guaranteed. */
+/* >       < 0:  If INFO = -i, the i-th argument had an illegal value */
+/* >       > 0 and <= N:  U(INFO,INFO) is exactly zero.  The factorization */
+/* >         has been completed, but the factor U is exactly singular, so */
+/* >         the solution and error bounds could not be computed. RCOND = 0 */
+/* >         is returned. */
+/* >       = N+J: The solution corresponding to the Jth right-hand side is */
+/* >         not guaranteed. The solutions corresponding to other right- */
+/* >         hand sides K with K > J may not be guaranteed as well, but */
+/* >         only the first such right-hand side is reported. If a small */
+/* >         componentwise error is not requested (PARAMS(3) = 0.0) then */
+/* >         the Jth right-hand side is the first with a normwise error */
+/* >         bound that is not guaranteed (the smallest J such */
+/* >         that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */
+/* >         the Jth right-hand side is the first with either a normwise or */
+/* >         componentwise error bound that is not guaranteed (the smallest */
+/* >         J such that either ERR_BNDS_NORM(J,1) = 0.0 or */
+/* >         ERR_BNDS_COMP(J,1) = 0.0). See the definition of */
+/* >         ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */
+/* >         about all of the right-hand sides check ERR_BNDS_NORM or */
+/* >         ERR_BNDS_COMP. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgerfsx_(char *trans, char *equed, integer *n, integer *
+	nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, 
+	integer *ipiv, doublereal *r__, doublereal *c__, doublereal *b, 
+	integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, 
+	doublereal *berr, integer *n_err_bnds__, doublereal *err_bnds_norm__, 
+	doublereal *err_bnds_comp__, integer *nparams, doublereal *params, 
+	doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
+	    x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, 
+	    err_bnds_comp_dim1, err_bnds_comp_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    doublereal illrcond_thresh__, unstable_thresh__, err_lbnd__;
+    char norm[1];
+    integer ref_type__;
+    extern integer ilatrans_(char *);
+    logical ignore_cwise__;
+    integer j;
+    extern logical lsame_(char *, char *);
+    doublereal anorm, rcond_tmp__;
+    integer prec_type__;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgecon_(char *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, integer *, 
+	    integer *), xerbla_(char *, integer *, ftnlen);
+    logical colequ, notran, rowequ;
+    integer trans_type__;
+    extern integer ilaprec_(char *);
+    extern doublereal dla_gercond_(char *, integer *, doublereal *, integer *
+	    , doublereal *, integer *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *);
+    integer ithresh, n_norms__;
+    doublereal rthresh, cwise_wrong__;
+    extern /* Subroutine */ int dla_gerfsx_extended_(integer *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, integer *, logical *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, doublereal *, doublereal *, 
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     doublereal *, logical *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ================================================================== */
+
+
+/*     Check the input parameters. */
+
+    /* Parameter adjustments */
+    err_bnds_comp_dim1 = *nrhs;
+    err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1;
+    err_bnds_comp__ -= err_bnds_comp_offset;
+    err_bnds_norm_dim1 = *nrhs;
+    err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1;
+    err_bnds_norm__ -= err_bnds_norm_offset;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    --r__;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --berr;
+    --params;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    trans_type__ = ilatrans_(trans);
+    ref_type__ = 1;
+    if (*nparams >= 1) {
+	if (params[1] < 0.) {
+	    params[1] = 1.;
+	} else {
+	    ref_type__ = (integer) params[1];
+	}
+    }
+
+/*     Set default parameters. */
+
+    illrcond_thresh__ = (doublereal) (*n) * dlamch_("Epsilon");
+    ithresh = 10;
+    rthresh = .5;
+    unstable_thresh__ = .25;
+    ignore_cwise__ = FALSE_;
+
+    if (*nparams >= 2) {
+	if (params[2] < 0.) {
+	    params[2] = (doublereal) ithresh;
+	} else {
+	    ithresh = (integer) params[2];
+	}
+    }
+    if (*nparams >= 3) {
+	if (params[3] < 0.) {
+	    if (ignore_cwise__) {
+		params[3] = 0.;
+	    } else {
+		params[3] = 1.;
+	    }
+	} else {
+	    ignore_cwise__ = params[3] == 0.;
+	}
+    }
+    if (ref_type__ == 0 || *n_err_bnds__ == 0) {
+	n_norms__ = 0;
+    } else if (ignore_cwise__) {
+	n_norms__ = 1;
+    } else {
+	n_norms__ = 2;
+    }
+
+    notran = lsame_(trans, "N");
+    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
+    colequ = lsame_(equed, "C") || lsame_(equed, "B");
+
+/*     Test input parameters. */
+
+    if (trans_type__ == -1) {
+	*info = -1;
+    } else if (! rowequ && ! colequ && ! lsame_(equed, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -6;
+    } else if (*ldaf < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -13;
+    } else if (*ldx < f2cmax(1,*n)) {
+	*info = -15;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGERFSX", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || *nrhs == 0) {
+	*rcond = 1.;
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    berr[j] = 0.;
+	    if (*n_err_bnds__ >= 1) {
+		err_bnds_norm__[j + err_bnds_norm_dim1] = 1.;
+		err_bnds_comp__[j + err_bnds_comp_dim1] = 1.;
+	    }
+	    if (*n_err_bnds__ >= 2) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.;
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.;
+	    }
+	    if (*n_err_bnds__ >= 3) {
+		err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.;
+		err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.;
+	    }
+	}
+	return 0;
+    }
+
+/*     Default to failure. */
+
+    *rcond = 0.;
+    i__1 = *nrhs;
+    for (j = 1; j <= i__1; ++j) {
+	berr[j] = 1.;
+	if (*n_err_bnds__ >= 1) {
+	    err_bnds_norm__[j + err_bnds_norm_dim1] = 1.;
+	    err_bnds_comp__[j + err_bnds_comp_dim1] = 1.;
+	}
+	if (*n_err_bnds__ >= 2) {
+	    err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.;
+	    err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.;
+	}
+	if (*n_err_bnds__ >= 3) {
+	    err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.;
+	    err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.;
+	}
+    }
+
+/*     Compute the norm of A and the reciprocal of the condition */
+/*     number of A. */
+
+    if (notran) {
+	*(unsigned char *)norm = 'I';
+    } else {
+	*(unsigned char *)norm = '1';
+    }
+    anorm = dlange_(norm, n, n, &a[a_offset], lda, &work[1]);
+    dgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1],
+	     info);
+
+/*     Perform refinement on each right-hand side */
+
+    if (ref_type__ != 0) {
+	prec_type__ = ilaprec_("E");
+	if (notran) {
+	    dla_gerfsx_extended_(&prec_type__, &trans_type__, n, nrhs, &a[
+		    a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &colequ, &
+		    c__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], &
+		    n_norms__, &err_bnds_norm__[err_bnds_norm_offset], &
+		    err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], &
+		    work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, 
+		    &rthresh, &unstable_thresh__, &ignore_cwise__, info);
+	} else {
+	    dla_gerfsx_extended_(&prec_type__, &trans_type__, n, nrhs, &a[
+		    a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &rowequ, &
+		    r__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], &
+		    n_norms__, &err_bnds_norm__[err_bnds_norm_offset], &
+		    err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], &
+		    work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, 
+		    &rthresh, &unstable_thresh__, &ignore_cwise__, info);
+	}
+    }
+/* Computing MAX */
+    d__1 = 10., d__2 = sqrt((doublereal) (*n));
+    err_lbnd__ = f2cmax(d__1,d__2) * dlamch_("Epsilon");
+    if (*n_err_bnds__ >= 1 && n_norms__ >= 1) {
+
+/*     Compute scaled normwise condition number cond(A*C). */
+
+	if (colequ && notran) {
+	    rcond_tmp__ = dla_gercond_(trans, n, &a[a_offset], lda, &af[
+		    af_offset], ldaf, &ipiv[1], &c_n1, &c__[1], info, &work[1]
+		    , &iwork[1]);
+	} else if (rowequ && ! notran) {
+	    rcond_tmp__ = dla_gercond_(trans, n, &a[a_offset], lda, &af[
+		    af_offset], ldaf, &ipiv[1], &c_n1, &r__[1], info, &work[1]
+		    , &iwork[1]);
+	} else {
+	    rcond_tmp__ = dla_gercond_(trans, n, &a[a_offset], lda, &af[
+		    af_offset], ldaf, &ipiv[1], &c__0, &r__[1], info, &work[1]
+		    , &iwork[1]);
+	}
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+
+/*     Cap the error at 1.0. */
+
+	    if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 
+		    << 1)] > 1.) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.;
+	    }
+
+/*     Threshold the error (see LAWN). */
+
+	    if (rcond_tmp__ < illrcond_thresh__) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.;
+		err_bnds_norm__[j + err_bnds_norm_dim1] = 0.;
+		if (*info <= *n) {
+		    *info = *n + j;
+		}
+	    } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < 
+		    err_lbnd__) {
+		err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__;
+		err_bnds_norm__[j + err_bnds_norm_dim1] = 1.;
+	    }
+
+/*     Save the condition number. */
+
+	    if (*n_err_bnds__ >= 3) {
+		err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__;
+	    }
+	}
+    }
+    if (*n_err_bnds__ >= 1 && n_norms__ >= 2) {
+
+/*     Compute componentwise condition number cond(A*diag(Y(:,J))) for */
+/*     each right-hand side using the current solution as an estimate of */
+/*     the true solution.  If the componentwise error estimate is too */
+/*     large, then the solution is a lousy estimate of truth and the */
+/*     estimated RCOND may be too optimistic.  To avoid misleading users, */
+/*     the inverse condition number is set to 0.0 when the estimated */
+/*     cwise error is at least CWISE_WRONG. */
+
+	cwise_wrong__ = sqrt(dlamch_("Epsilon"));
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < 
+		    cwise_wrong__) {
+		rcond_tmp__ = dla_gercond_(trans, n, &a[a_offset], lda, &af[
+			af_offset], ldaf, &ipiv[1], &c__1, &x[j * x_dim1 + 1],
+			 info, &work[1], &iwork[1]);
+	    } else {
+		rcond_tmp__ = 0.;
+	    }
+
+/*     Cap the error at 1.0. */
+
+	    if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 
+		    << 1)] > 1.) {
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.;
+	    }
+
+/*     Threshold the error (see LAWN). */
+
+	    if (rcond_tmp__ < illrcond_thresh__) {
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.;
+		err_bnds_comp__[j + err_bnds_comp_dim1] = 0.;
+		if (params[3] == 1. && *info < *n + j) {
+		    *info = *n + j;
+		}
+	    } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < 
+		    err_lbnd__) {
+		err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__;
+		err_bnds_comp__[j + err_bnds_comp_dim1] = 1.;
+	    }
+
+/*     Save the condition number. */
+
+	    if (*n_err_bnds__ >= 3) {
+		err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DGERFSX */
+
+} /* dgerfsx_ */
+
diff --git a/lapack-netlib/SRC/dgerq2.c b/lapack-netlib/SRC/dgerq2.c
new file mode 100644
index 000000000..0044467b9
--- /dev/null
+++ b/lapack-netlib/SRC/dgerq2.c
@@ -0,0 +1,587 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief \b DGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorit
+hm. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGERQ2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgerq2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgerq2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgerq2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGERQ2 computes an RQ factorization of a real m by n matrix A: */
+/* > A = R * Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix A. */
+/* >          On exit, if m <= n, the upper triangle of the subarray */
+/* >          A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */
+/* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
+/* >          contain the m by n upper trapezoidal matrix R; the remaining */
+/* >          elements, with the array TAU, represent the orthogonal matrix */
+/* >          Q as a product of elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (M) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
+/* >  A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgerq2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    integer i__, k;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *), dlarfg_(integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *, ftnlen);
+    doublereal aii;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGERQ2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    k = f2cmin(*m,*n);
+
+    for (i__ = k; i__ >= 1; --i__) {
+
+/*        Generate elementary reflector H(i) to annihilate */
+/*        A(m-k+i,1:n-k+i-1) */
+
+	i__1 = *n - k + i__;
+	dlarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[*m - k 
+		+ i__ + a_dim1], lda, &tau[i__]);
+
+/*        Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */
+
+	aii = a[*m - k + i__ + (*n - k + i__) * a_dim1];
+	a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.;
+	i__1 = *m - k + i__ - 1;
+	i__2 = *n - k + i__;
+	dlarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[
+		i__], &a[a_offset], lda, &work[1]);
+	a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGERQ2 */
+
+} /* dgerq2_ */
+
diff --git a/lapack-netlib/SRC/dgerqf.c b/lapack-netlib/SRC/dgerqf.c
new file mode 100644
index 000000000..d0ec2af49
--- /dev/null
+++ b/lapack-netlib/SRC/dgerqf.c
@@ -0,0 +1,710 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+
+/* > \brief \b DGERQF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGERQF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgerqf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgerqf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgerqf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGERQF computes an RQ factorization of a real M-by-N matrix A: */
+/* > A = R * Q. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          if m <= n, the upper triangle of the subarray */
+/* >          A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */
+/* >          if m >= n, the elements on and above the (m-n)-th subdiagonal */
+/* >          contain the M-by-N upper trapezoidal matrix R; */
+/* >          the remaining elements, with the array TAU, represent the */
+/* >          orthogonal matrix Q as a product of f2cmin(m,n) elementary */
+/* >          reflectors (see Further Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] TAU */
+/* > \verbatim */
+/* >          TAU is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The scalar factors of the elementary reflectors (see Further */
+/* >          Details). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,M). */
+/* >          For optimum performance LWORK >= M*NB, where NB is */
+/* >          the optimal blocksize. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The matrix Q is represented as a product of elementary reflectors */
+/* > */
+/* >     Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */
+/* > */
+/* >  Each H(i) has the form */
+/* > */
+/* >     H(i) = I - tau * v * v**T */
+/* > */
+/* >  where tau is a real scalar, and v is a real vector with */
+/* >  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */
+/* >  A(m-k+i,1:n-k+i-1), and tau in TAU(i). */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgerqf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    integer i__, k, nbmin, iinfo;
+    extern /* Subroutine */ int dgerq2_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer ib, nb, ki, kk;
+    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
+	    integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer mu, nu, nx;
+    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwork, lwkopt;
+    logical lquery;
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+
+    if (*info == 0) {
+	k = f2cmin(*m,*n);
+	if (k == 0) {
+	    lwkopt = 1;
+	} else {
+	    nb = ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, 
+		    (ftnlen)1);
+	    lwkopt = *m * nb;
+	}
+	work[1] = (doublereal) lwkopt;
+
+	if (*lwork < f2cmax(1,*m) && ! lquery) {
+	    *info = -7;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGERQF", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (k == 0) {
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 1;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*        Determine when to cross over from blocked to unblocked code. */
+
+/* Computing MAX */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGERQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = f2cmax(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*              Not enough workspace to use optimal NB:  reduce NB and */
+/*              determine the minimum value of NB. */
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGERQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = f2cmax(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially. */
+/*        The last kk rows are handled by the block method. */
+
+	ki = (k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = k, i__2 = ki + nb;
+	kk = f2cmin(i__1,i__2);
+
+	i__1 = k - kk + 1;
+	i__2 = -nb;
+	for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ 
+		+= i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = f2cmin(i__3,nb);
+
+/*           Compute the RQ factorization of the current block */
+/*           A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */
+
+	    i__3 = *n - k + i__ + ib - 1;
+	    dgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+	    if (*m - k + i__ > 1) {
+
+/*              Form the triangular factor of the block reflector */
+/*              H = H(i+ib-1) . . . H(i+1) H(i) */
+
+		i__3 = *n - k + i__ + ib - 1;
+		dlarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + 
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */
+
+		i__3 = *m - k + i__ - 1;
+		i__4 = *n - k + i__ + ib - 1;
+		dlarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3,
+			 &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1],
+			 &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork);
+	    }
+/* L10: */
+	}
+	mu = *m - k + i__ + nb - 1;
+	nu = *n - k + i__ + nb - 1;
+    } else {
+	mu = *m;
+	nu = *n;
+    }
+
+/*     Use unblocked code to factor the last or only block */
+
+    if (mu > 0 && nu > 0) {
+	dgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGERQF */
+
+} /* dgerqf_ */
+
diff --git a/lapack-netlib/SRC/dgesc2.c b/lapack-netlib/SRC/dgesc2.c
new file mode 100644
index 000000000..a2a1aac42
--- /dev/null
+++ b/lapack-netlib/SRC/dgesc2.c
@@ -0,0 +1,604 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+
+/* > \brief \b DGESC2 solves a system of linear equations using the LU factorization with complete pivoting co
+mputed by sgetc2. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGESC2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesc2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesc2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesc2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) */
+
+/*       INTEGER            LDA, N */
+/*       DOUBLE PRECISION   SCALE */
+/*       INTEGER            IPIV( * ), JPIV( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), RHS( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGESC2 solves a system of linear equations */
+/* > */
+/* >           A * X = scale* RHS */
+/* > */
+/* > with a general N-by-N matrix A using the LU factorization with */
+/* > complete pivoting computed by DGETC2. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the  LU part of the factorization of the n-by-n */
+/* >          matrix A computed by DGETC2:  A = P * L * U * Q */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1, N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] RHS */
+/* > \verbatim */
+/* >          RHS is DOUBLE PRECISION array, dimension (N). */
+/* >          On entry, the right hand side vector b. */
+/* >          On exit, the solution vector X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N). */
+/* >          The pivot indices; for 1 <= i <= N, row i of the */
+/* >          matrix has been interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JPIV */
+/* > \verbatim */
+/* >          JPIV is INTEGER array, dimension (N). */
+/* >          The pivot indices; for 1 <= j <= N, column j of the */
+/* >          matrix has been interchanged with column JPIV(j). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SCALE */
+/* > \verbatim */
+/* >          SCALE is DOUBLE PRECISION */
+/* >          On exit, SCALE contains the scale factor. SCALE is chosen */
+/* >          0 <= SCALE <= 1 to prevent overflow in the solution. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2017 */
+
+/* > \ingroup doubleGEauxiliary */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
+/* >     Umea University, S-901 87 Umea, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgesc2_(integer *n, doublereal *a, integer *lda, 
+	doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    doublereal temp;
+    integer i__, j;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *), dlabad_(doublereal *, doublereal *);
+    extern doublereal dlamch_(char *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    doublereal bignum;
+    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, 
+	    integer *, integer *, integer *, integer *);
+    doublereal smlnum, eps;
+
+
+/*  -- LAPACK auxiliary routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     November 2017 */
+
+
+/*  ===================================================================== */
+
+
+/*      Set constant to control overflow */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --rhs;
+    --ipiv;
+    --jpiv;
+
+    /* Function Body */
+    eps = dlamch_("P");
+    smlnum = dlamch_("S") / eps;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Apply permutations IPIV to RHS */
+
+    i__1 = *n - 1;
+    dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1);
+
+/*     Solve for L part */
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *n;
+	for (j = i__ + 1; j <= i__2; ++j) {
+	    rhs[j] -= a[j + i__ * a_dim1] * rhs[i__];
+/* L10: */
+	}
+/* L20: */
+    }
+
+/*     Solve for U part */
+
+    *scale = 1.;
+
+/*     Check for scaling */
+
+    i__ = idamax_(n, &rhs[1], &c__1);
+    if (smlnum * 2. * (d__1 = rhs[i__], abs(d__1)) > (d__2 = a[*n + *n * 
+	    a_dim1], abs(d__2))) {
+	temp = .5 / (d__1 = rhs[i__], abs(d__1));
+	dscal_(n, &temp, &rhs[1], &c__1);
+	*scale *= temp;
+    }
+
+    for (i__ = *n; i__ >= 1; --i__) {
+	temp = 1. / a[i__ + i__ * a_dim1];
+	rhs[i__] *= temp;
+	i__1 = *n;
+	for (j = i__ + 1; j <= i__1; ++j) {
+	    rhs[i__] -= rhs[j] * (a[i__ + j * a_dim1] * temp);
+/* L30: */
+	}
+/* L40: */
+    }
+
+/*     Apply permutations JPIV to the solution (RHS) */
+
+    i__1 = *n - 1;
+    dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1);
+    return 0;
+
+/*     End of DGESC2 */
+
+} /* dgesc2_ */
+
diff --git a/lapack-netlib/SRC/dgesdd.c b/lapack-netlib/SRC/dgesdd.c
new file mode 100644
index 000000000..f831c97b0
--- /dev/null
+++ b/lapack-netlib/SRC/dgesdd.c
@@ -0,0 +1,2167 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__0 = 0;
+static doublereal c_b63 = 0.;
+static integer c__1 = 1;
+static doublereal c_b84 = 1.;
+
+/* > \brief \b DGESDD */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGESDD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesdd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesdd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesdd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, */
+/*                          WORK, LWORK, IWORK, INFO ) */
+
+/*       CHARACTER          JOBZ */
+/*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N */
+/*       INTEGER            IWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ), */
+/*      $                   VT( LDVT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGESDD computes the singular value decomposition (SVD) of a real */
+/* > M-by-N matrix A, optionally computing the left and right singular */
+/* > vectors.  If singular vectors are desired, it uses a */
+/* > divide-and-conquer algorithm. */
+/* > */
+/* > The SVD is written */
+/* > */
+/* >      A = U * SIGMA * transpose(V) */
+/* > */
+/* > where SIGMA is an M-by-N matrix which is zero except for its */
+/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */
+/* > V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA */
+/* > are the singular values of A; they are real and non-negative, and */
+/* > are returned in descending order.  The first f2cmin(m,n) columns of */
+/* > U and V are the left and right singular vectors of A. */
+/* > */
+/* > Note that the routine returns VT = V**T, not V. */
+/* > */
+/* > The divide and conquer algorithm makes very mild assumptions about */
+/* > floating point arithmetic. It will work on machines with a guard */
+/* > digit in add/subtract, or on those binary machines without guard */
+/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
+/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */
+/* > without guard digits, but we know of none. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBZ */
+/* > \verbatim */
+/* >          JOBZ is CHARACTER*1 */
+/* >          Specifies options for computing all or part of the matrix U: */
+/* >          = 'A':  all M columns of U and all N rows of V**T are */
+/* >                  returned in the arrays U and VT; */
+/* >          = 'S':  the first f2cmin(M,N) columns of U and the first */
+/* >                  f2cmin(M,N) rows of V**T are returned in the arrays U */
+/* >                  and VT; */
+/* >          = 'O':  If M >= N, the first N columns of U are overwritten */
+/* >                  on the array A and all rows of V**T are returned in */
+/* >                  the array VT; */
+/* >                  otherwise, all columns of U are returned in the */
+/* >                  array U and the first M rows of V**T are overwritten */
+/* >                  in the array A; */
+/* >          = 'N':  no columns of U or rows of V**T are computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          if JOBZ = 'O',  A is overwritten with the first N columns */
+/* >                          of U (the left singular vectors, stored */
+/* >                          columnwise) if M >= N; */
+/* >                          A is overwritten with the first M rows */
+/* >                          of V**T (the right singular vectors, stored */
+/* >                          rowwise) otherwise. */
+/* >          if JOBZ .ne. 'O', the contents of A are destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The singular values of A, sorted so that S(i) >= S(i+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is DOUBLE PRECISION array, dimension (LDU,UCOL) */
+/* >          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; */
+/* >          UCOL = f2cmin(M,N) if JOBZ = 'S'. */
+/* >          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M */
+/* >          orthogonal matrix U; */
+/* >          if JOBZ = 'S', U contains the first f2cmin(M,N) columns of U */
+/* >          (the left singular vectors, stored columnwise); */
+/* >          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U.  LDU >= 1; if */
+/* >          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VT */
+/* > \verbatim */
+/* >          VT is DOUBLE PRECISION array, dimension (LDVT,N) */
+/* >          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the */
+/* >          N-by-N orthogonal matrix V**T; */
+/* >          if JOBZ = 'S', VT contains the first f2cmin(M,N) rows of */
+/* >          V**T (the right singular vectors, stored rowwise); */
+/* >          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVT */
+/* > \verbatim */
+/* >          LDVT is INTEGER */
+/* >          The leading dimension of the array VT.  LDVT >= 1; */
+/* >          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; */
+/* >          if JOBZ = 'S', LDVT >= f2cmin(M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. LWORK >= 1. */
+/* >          If LWORK = -1, a workspace query is assumed.  The optimal */
+/* >          size for the WORK array is calculated and stored in WORK(1), */
+/* >          and no other work except argument checking is performed. */
+/* > */
+/* >          Let mx = f2cmax(M,N) and mn = f2cmin(M,N). */
+/* >          If JOBZ = 'N', LWORK >= 3*mn + f2cmax( mx, 7*mn ). */
+/* >          If JOBZ = 'O', LWORK >= 3*mn + f2cmax( mx, 5*mn*mn + 4*mn ). */
+/* >          If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn. */
+/* >          If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx. */
+/* >          These are not tight minimums in all cases; see comments inside code. */
+/* >          For good performance, LWORK should generally be larger; */
+/* >          a query is recommended. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (8*f2cmin(M,N)) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  DBDSDC did not converge, updating process failed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup doubleGEsing */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >     Ming Gu and Huan Ren, Computer Science Division, University of */
+/* >     California at Berkeley, USA */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal *
+	a, integer *lda, doublereal *s, doublereal *u, integer *ldu, 
+	doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, 
+	integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, 
+	    i__2, i__3;
+
+    /* Local variables */
+    integer lwork_dorglq_mn__, lwork_dorglq_nn__, lwork_dorgqr_mm__, 
+	    lwork_dorgqr_mn__, iscl;
+    doublereal anrm;
+    integer idum[1], ierr, itau, lwork_dormbr_qln_mm__, lwork_dormbr_qln_mn__,
+	     lwork_dormbr_qln_nn__, lwork_dormbr_prt_mm__, 
+	    lwork_dormbr_prt_mn__, lwork_dormbr_prt_nn__, i__;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    integer chunk, minmn, wrkbl, itaup, itauq, mnthr;
+    logical wntqa;
+    integer nwork;
+    logical wntqn, wntqo, wntqs;
+    integer ie, lwork_dorgbr_p_mm__;
+    extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal 
+	    *, doublereal *, doublereal *, integer *, doublereal *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, integer *);
+    integer il, lwork_dorgbr_q_nn__;
+    extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *);
+    integer ir, bdspac;
+    extern doublereal dlange_(char *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *);
+    integer iu;
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *), 
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *, 
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dgeqrf_(integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
+	     integer *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *), dlaset_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen), dorgbr_(char *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, integer *);
+    extern logical disnan_(doublereal *);
+    doublereal bignum;
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *, integer *);
+    integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
+    doublereal smlnum;
+    logical wntqas, lquery;
+    integer blk;
+    doublereal dum[1], eps;
+    integer ivt, lwork_dgebrd_mm__, lwork_dgebrd_mn__, lwork_dgebrd_nn__, 
+	    lwork_dgelqf_mn__, lwork_dgeqrf_mn__;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1 * 1;
+    vt -= vt_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = f2cmin(*m,*n);
+    wntqa = lsame_(jobz, "A");
+    wntqs = lsame_(jobz, "S");
+    wntqas = wntqa || wntqs;
+    wntqo = lsame_(jobz, "O");
+    wntqn = lsame_(jobz, "N");
+    lquery = *lwork == -1;
+
+    if (! (wntqa || wntqs || wntqo || wntqn)) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -5;
+    } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *
+	    m) {
+	*info = -8;
+    } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || 
+	    wntqo && *m >= *n && *ldvt < *n) {
+	*info = -10;
+    }
+
+/*     Compute workspace */
+/*       Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace allocated at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV. */
+
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	bdspac = 0;
+	mnthr = (integer) (minmn * 11. / 6.);
+	if (*m >= *n && minmn > 0) {
+
+/*           Compute space needed for DBDSDC */
+
+	    if (wntqn) {
+/*              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) */
+/*              keep 7*N for backwards compatibility. */
+		bdspac = *n * 7;
+	    } else {
+		bdspac = *n * 3 * *n + (*n << 2);
+	    }
+
+/*           Compute space preferred for each routine */
+	    dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr);
+	    lwork_dgebrd_mn__ = (integer) dum[0];
+
+	    dgebrd_(n, n, dum, n, dum, dum, dum, dum, dum, &c_n1, &ierr);
+	    lwork_dgebrd_nn__ = (integer) dum[0];
+
+	    dgeqrf_(m, n, dum, m, dum, dum, &c_n1, &ierr);
+	    lwork_dgeqrf_mn__ = (integer) dum[0];
+
+	    dorgbr_("Q", n, n, n, dum, n, dum, dum, &c_n1, &ierr);
+	    lwork_dorgbr_q_nn__ = (integer) dum[0];
+
+	    dorgqr_(m, m, n, dum, m, dum, dum, &c_n1, &ierr);
+	    lwork_dorgqr_mm__ = (integer) dum[0];
+
+	    dorgqr_(m, n, n, dum, m, dum, dum, &c_n1, &ierr);
+	    lwork_dorgqr_mn__ = (integer) dum[0];
+
+	    dormbr_("P", "R", "T", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &
+		    ierr);
+	    lwork_dormbr_prt_nn__ = (integer) dum[0];
+
+	    dormbr_("Q", "L", "N", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &
+		    ierr);
+	    lwork_dormbr_qln_nn__ = (integer) dum[0];
+
+	    dormbr_("Q", "L", "N", m, n, n, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_dormbr_qln_mn__ = (integer) dum[0];
+
+	    dormbr_("Q", "L", "N", m, m, n, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_dormbr_qln_mm__ = (integer) dum[0];
+
+	    if (*m >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1 (M >> N, JOBZ='N') */
+
+		    wrkbl = *n + lwork_dgeqrf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = bdspac + *n;
+		} else if (wntqo) {
+
+/*                 Path 2 (M >> N, JOBZ='O') */
+
+		    wrkbl = *n + lwork_dgeqrf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + (*n << 1) * *n;
+		    minwrk = bdspac + (*n << 1) * *n + *n * 3;
+		} else if (wntqs) {
+
+/*                 Path 3 (M >> N, JOBZ='S') */
+
+		    wrkbl = *n + lwork_dgeqrf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		} else if (wntqa) {
+
+/*                 Path 4 (M >> N, JOBZ='A') */
+
+		    wrkbl = *n + lwork_dgeqrf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+/* Computing MAX */
+		    i__1 = *n * 3 + bdspac, i__2 = *n + *m;
+		    minwrk = *n * *n + f2cmax(i__1,i__2);
+		}
+	    } else {
+
+/*              Path 5 (M >= N, but not much larger) */
+
+		wrkbl = *n * 3 + lwork_dgebrd_mn__;
+		if (wntqn) {
+/*                 Path 5n (M >= N, jobz='N') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *n * 3 + f2cmax(*m,bdspac);
+		} else if (wntqo) {
+/*                 Path 5o (M >= N, jobz='O') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *m, i__2 = *n * *n + bdspac;
+		    minwrk = *n * 3 + f2cmax(i__1,i__2);
+		} else if (wntqs) {
+/*                 Path 5s (M >= N, jobz='S') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *n * 3 + f2cmax(*m,bdspac);
+		} else if (wntqa) {
+/*                 Path 5a (M >= N, jobz='A') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *n * 3 + f2cmax(*m,bdspac);
+		}
+	    }
+	} else if (minmn > 0) {
+
+/*           Compute space needed for DBDSDC */
+
+	    if (wntqn) {
+/*              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) */
+/*              keep 7*N for backwards compatibility. */
+		bdspac = *m * 7;
+	    } else {
+		bdspac = *m * 3 * *m + (*m << 2);
+	    }
+
+/*           Compute space preferred for each routine */
+	    dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr);
+	    lwork_dgebrd_mn__ = (integer) dum[0];
+
+	    dgebrd_(m, m, &a[a_offset], m, &s[1], dum, dum, dum, dum, &c_n1, &
+		    ierr);
+	    lwork_dgebrd_mm__ = (integer) dum[0];
+
+	    dgelqf_(m, n, &a[a_offset], m, dum, dum, &c_n1, &ierr);
+	    lwork_dgelqf_mn__ = (integer) dum[0];
+
+	    dorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr);
+	    lwork_dorglq_nn__ = (integer) dum[0];
+
+	    dorglq_(m, n, m, &a[a_offset], m, dum, dum, &c_n1, &ierr);
+	    lwork_dorglq_mn__ = (integer) dum[0];
+
+	    dorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
+	    lwork_dorgbr_p_mm__ = (integer) dum[0];
+
+	    dormbr_("P", "R", "T", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_dormbr_prt_mm__ = (integer) dum[0];
+
+	    dormbr_("P", "R", "T", m, n, m, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_dormbr_prt_mn__ = (integer) dum[0];
+
+	    dormbr_("P", "R", "T", n, n, m, dum, n, dum, dum, n, dum, &c_n1, &
+		    ierr);
+	    lwork_dormbr_prt_nn__ = (integer) dum[0];
+
+	    dormbr_("Q", "L", "N", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &
+		    ierr);
+	    lwork_dormbr_qln_mm__ = (integer) dum[0];
+
+	    if (*n >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1t (N >> M, JOBZ='N') */
+
+		    wrkbl = *m + lwork_dgelqf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = bdspac + *m;
+		} else if (wntqo) {
+
+/*                 Path 2t (N >> M, JOBZ='O') */
+
+		    wrkbl = *m + lwork_dgelqf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + (*m << 1) * *m;
+		    minwrk = bdspac + (*m << 1) * *m + *m * 3;
+		} else if (wntqs) {
+
+/*                 Path 3t (N >> M, JOBZ='S') */
+
+		    wrkbl = *m + lwork_dgelqf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		} else if (wntqa) {
+
+/*                 Path 4t (N >> M, JOBZ='A') */
+
+		    wrkbl = *m + lwork_dgelqf_mn__;
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + lwork_dorglq_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+/* Computing MAX */
+		    i__1 = *m * 3 + bdspac, i__2 = *m + *n;
+		    minwrk = *m * *m + f2cmax(i__1,i__2);
+		}
+	    } else {
+
+/*              Path 5t (N > M, but not much larger) */
+
+		wrkbl = *m * 3 + lwork_dgebrd_mn__;
+		if (wntqn) {
+/*                 Path 5tn (N > M, jobz='N') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *m * 3 + f2cmax(*n,bdspac);
+		} else if (wntqo) {
+/*                 Path 5to (N > M, jobz='O') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    wrkbl = f2cmax(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *n, i__2 = *m * *m + bdspac;
+		    minwrk = *m * 3 + f2cmax(i__1,i__2);
+		} else if (wntqs) {
+/*                 Path 5ts (N > M, jobz='S') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *m * 3 + f2cmax(*n,bdspac);
+		} else if (wntqa) {
+/*                 Path 5ta (N > M, jobz='A') */
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_nn__;
+		    wrkbl = f2cmax(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + bdspac;
+		    maxwrk = f2cmax(i__1,i__2);
+		    minwrk = *m * 3 + f2cmax(*n,bdspac);
+		}
+	    }
+	}
+	maxwrk = f2cmax(maxwrk,minwrk);
+	work[1] = (doublereal) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESDD", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = dlamch_("P");
+    smlnum = sqrt(dlamch_("S")) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, dum);
+    if (disnan_(&anrm)) {
+	*info = -4;
+	return 0;
+    }
+    iscl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*        A has at least as many rows as columns. If A has sufficiently */
+/*        more rows than columns, first reduce using the QR */
+/*        decomposition (if sufficient workspace available) */
+
+	if (*m >= mnthr) {
+
+	    if (wntqn) {
+
+/*              Path 1 (M >> N, JOBZ='N') */
+/*              No singular vectors to be computed */
+
+		itau = 1;
+		nwork = itau + *n;
+
+/*              Compute A=Q*R */
+/*              Workspace: need   N [tau] + N    [work] */
+/*              Workspace: prefer N [tau] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out below R */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b63, &c_b63, &a[a_dim1 + 2], 
+			lda);
+		ie = 1;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*              Bidiagonalize R in A */
+/*              Workspace: need   3*N [e, tauq, taup] + N      [work] */
+/*              Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *n;
+
+/*              Perform bidiagonal SVD, computing singular values only */
+/*              Workspace: need   N [e] + BDSPAC */
+
+		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*              Path 2 (M >> N, JOBZ = 'O') */
+/*              N left singular vectors to be overwritten on A and */
+/*              N right singular vectors to be computed in VT */
+
+		ir = 1;
+
+/*              WORK(IR) is LDWRKR by N */
+
+		if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) {
+		    ldwrkr = *lda;
+		} else {
+		    ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n;
+		}
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*              Compute A=Q*R */
+/*              Workspace: need   N*N [R] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [R] + N [tau] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b63, &c_b63, &work[ir + 1], &
+			ldwrkr);
+
+/*              Generate Q in A */
+/*              Workspace: need   N*N [R] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [R] + N [tau] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*              Bidiagonalize R in WORK(IR) */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work] */
+/*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*              WORK(IU) is N by N */
+
+		iu = nwork;
+		nwork = iu + *n * *n;
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in WORK(IU) and computing right */
+/*              singular vectors of bidiagonal matrix in VT */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC */
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite WORK(IU) by left singular vectors of R */
+/*              and VT by right singular vectors of R */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N    [work] */
+/*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &work[iu], n, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*              Multiply Q in A by left singular vectors of R in */
+/*              WORK(IU), storing result in WORK(IR) and copying to A */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] */
+/*              Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U] */
+
+		i__1 = *m;
+		i__2 = ldwrkr;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
+			i__2) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = f2cmin(i__3,ldwrkr);
+		    dgemm_("N", "N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], 
+			    lda, &work[iu], n, &c_b63, &work[ir], &ldwrkr);
+		    dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + 
+			    a_dim1], lda);
+/* L10: */
+		}
+
+	    } else if (wntqs) {
+
+/*              Path 3 (M >> N, JOBZ='S') */
+/*              N left singular vectors to be computed in U and */
+/*              N right singular vectors to be computed in VT */
+
+		ir = 1;
+
+/*              WORK(IR) is N by N */
+
+		ldwrkr = *n;
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*              Compute A=Q*R */
+/*              Workspace: need   N*N [R] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [R] + N [tau] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		dlaset_("L", &i__2, &i__1, &c_b63, &c_b63, &work[ir + 1], &
+			ldwrkr);
+
+/*              Generate Q in A */
+/*              Workspace: need   N*N [R] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [R] + N [tau] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*              Bidiagonalize R in WORK(IR) */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work] */
+/*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagoal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + BDSPAC */
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of R and VT */
+/*              by right singular vectors of R */
+/*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N    [work] */
+/*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*              Multiply Q in A by left singular vectors of R in */
+/*              WORK(IR), storing result in U */
+/*              Workspace: need   N*N [R] */
+
+		dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		dgemm_("N", "N", m, n, n, &c_b84, &a[a_offset], lda, &work[ir]
+			, &ldwrkr, &c_b63, &u[u_offset], ldu);
+
+	    } else if (wntqa) {
+
+/*              Path 4 (M >> N, JOBZ='A') */
+/*              M left singular vectors to be computed in U and */
+/*              N right singular vectors to be computed in VT */
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		itau = iu + ldwrku * *n;
+		nwork = itau + *n;
+
+/*              Compute A=Q*R, copying result to U */
+/*              Workspace: need   N*N [U] + N [tau] + N    [work] */
+/*              Workspace: prefer N*N [U] + N [tau] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*              Generate Q in U */
+/*              Workspace: need   N*N [U] + N [tau] + M    [work] */
+/*              Workspace: prefer N*N [U] + N [tau] + M*NB [work] */
+		i__2 = *lwork - nwork + 1;
+		dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+
+/*              Produce R in A, zeroing out other entries */
+
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		dlaset_("L", &i__2, &i__1, &c_b63, &c_b63, &a[a_dim1 + 2], 
+			lda);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*              Bidiagonalize R in A */
+/*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N      [work] */
+/*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in WORK(IU) and computing right */
+/*              singular vectors of bidiagonal matrix in VT */
+/*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + BDSPAC */
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite WORK(IU) by left singular vectors of R and VT */
+/*              by right singular vectors of R */
+/*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N    [work] */
+/*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			ierr);
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*              Multiply Q in U by left singular vectors of R in */
+/*              WORK(IU), storing result in A */
+/*              Workspace: need   N*N [U] */
+
+		dgemm_("N", "N", m, n, n, &c_b84, &u[u_offset], ldu, &work[iu]
+			, &ldwrku, &c_b63, &a[a_offset], lda);
+
+/*              Copy left singular vectors of A from A to U */
+
+		dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+	    }
+
+	} else {
+
+/*           M .LT. MNTHR */
+
+/*           Path 5 (M >= N, but not much larger) */
+/*           Reduce to bidiagonal form without QR decomposition */
+
+	    ie = 1;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*           Bidiagonalize A */
+/*           Workspace: need   3*N [e, tauq, taup] + M        [work] */
+/*           Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work] */
+
+	    i__2 = *lwork - nwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*              Path 5n (M >= N, JOBZ='N') */
+/*              Perform bidiagonal SVD, only computing singular values */
+/*              Workspace: need   3*N [e, tauq, taup] + BDSPAC */
+
+		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+/*              Path 5o (M >= N, JOBZ='O') */
+		iu = nwork;
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		    nwork = iu + ldwrku * *n;
+		    dlaset_("F", m, n, &c_b63, &c_b63, &work[iu], &ldwrku);
+/*                 IR is unused; silence compile warnings */
+		    ir = -1;
+		} else {
+
+/*                 WORK( IU ) is N by N */
+
+		    ldwrku = *n;
+		    nwork = iu + ldwrku * *n;
+
+/*                 WORK(IR) is LDWRKR by N */
+
+		    ir = nwork;
+		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in WORK(IU) and computing right */
+/*              singular vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + BDSPAC */
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
+			vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[
+			1], info);
+
+/*              Overwrite VT by right singular vectors of A */
+/*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work] */
+/*              Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*                 Path 5o-fast */
+/*                 Overwrite WORK(IU) by left singular vectors of A */
+/*                 Workspace: need   3*N [e, tauq, taup] + M*N [U] + N    [work] */
+/*                 Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work] */
+
+		    i__2 = *lwork - nwork + 1;
+		    dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			    itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			    ierr);
+
+/*                 Copy left singular vectors of A from WORK(IU) to A */
+
+		    dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		} else {
+
+/*                 Path 5o-slow */
+/*                 Generate Q in A */
+/*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work] */
+/*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */
+
+		    i__2 = *lwork - nwork + 1;
+		    dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[nwork], &i__2, &ierr);
+
+/*                 Multiply Q in A by left singular vectors of */
+/*                 bidiagonal matrix in WORK(IU), storing result in */
+/*                 WORK(IR) and copying to A */
+/*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + NB*N [R] */
+/*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N  [R] */
+
+		    i__2 = *m;
+		    i__1 = ldwrkr;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *m - i__ + 1;
+			chunk = f2cmin(i__3,ldwrkr);
+			dgemm_("N", "N", &chunk, n, n, &c_b84, &a[i__ + 
+				a_dim1], lda, &work[iu], &ldwrku, &c_b63, &
+				work[ir], &ldwrkr);
+			dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + 
+				a_dim1], lda);
+/* L20: */
+		    }
+		}
+
+	    } else if (wntqs) {
+
+/*              Path 5s (M >= N, JOBZ='S') */
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*N [e, tauq, taup] + BDSPAC */
+
+		dlaset_("F", m, n, &c_b63, &c_b63, &u[u_offset], ldu);
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of A and VT */
+/*              by right singular vectors of A */
+/*              Workspace: need   3*N [e, tauq, taup] + N    [work] */
+/*              Workspace: prefer 3*N [e, tauq, taup] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*              Path 5a (M >= N, JOBZ='A') */
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*N [e, tauq, taup] + BDSPAC */
+
+		dlaset_("F", m, m, &c_b63, &c_b63, &u[u_offset], ldu);
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Set the right corner of U to identity matrix */
+
+		if (*m > *n) {
+		    i__1 = *m - *n;
+		    i__2 = *m - *n;
+		    dlaset_("F", &i__1, &i__2, &c_b63, &c_b84, &u[*n + 1 + (*
+			    n + 1) * u_dim1], ldu);
+		}
+
+/*              Overwrite U by left singular vectors of A and VT */
+/*              by right singular vectors of A */
+/*              Workspace: need   3*N [e, tauq, taup] + M    [work] */
+/*              Workspace: prefer 3*N [e, tauq, taup] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    } else {
+
+/*        A has more columns than rows. If A has sufficiently more */
+/*        columns than rows, first reduce using the LQ decomposition (if */
+/*        sufficient workspace available) */
+
+	if (*n >= mnthr) {
+
+	    if (wntqn) {
+
+/*              Path 1t (N >> M, JOBZ='N') */
+/*              No singular vectors to be computed */
+
+		itau = 1;
+		nwork = itau + *m;
+
+/*              Compute A=L*Q */
+/*              Workspace: need   M [tau] + M [work] */
+/*              Workspace: prefer M [tau] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out above L */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		dlaset_("U", &i__1, &i__2, &c_b63, &c_b63, &a[(a_dim1 << 1) + 
+			1], lda);
+		ie = 1;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*              Bidiagonalize L in A */
+/*              Workspace: need   3*M [e, tauq, taup] + M      [work] */
+/*              Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *m;
+
+/*              Perform bidiagonal SVD, computing singular values only */
+/*              Workspace: need   M [e] + BDSPAC */
+
+		dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*              Path 2t (N >> M, JOBZ='O') */
+/*              M right singular vectors to be overwritten on A and */
+/*              M left singular vectors to be computed in U */
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+/*              WORK(IL)  is M by M; it is later resized to M by chunk for gemm */
+
+		il = ivt + *m * *m;
+		if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) {
+		    ldwrkl = *m;
+		    chunk = *n;
+		} else {
+		    ldwrkl = *m;
+		    chunk = (*lwork - *m * *m) / *m;
+		}
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*              Compute A=L*Q */
+/*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy L to WORK(IL), zeroing about above it */
+
+		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		dlaset_("U", &i__1, &i__2, &c_b63, &c_b63, &work[il + ldwrkl],
+			 &ldwrkl);
+
+/*              Generate Q in A */
+/*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*              Bidiagonalize L in WORK(IL) */
+/*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M      [work] */
+/*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U, and computing right singular */
+/*              vectors of bidiagonal matrix in WORK(IVT) */
+/*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC */
+
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], m, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of L and WORK(IVT) */
+/*              by right singular vectors of L */
+/*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M    [work] */
+/*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &work[ivt], m, &work[nwork], &i__1, &ierr);
+
+/*              Multiply right singular vectors of L in WORK(IVT) by Q */
+/*              in A, storing result in WORK(IL) and copying to A */
+/*              Workspace: need   M*M [VT] + M*M [L] */
+/*              Workspace: prefer M*M [VT] + M*N [L] */
+/*              At this point, L is resized as M by chunk. */
+
+		i__1 = *n;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
+			i__2) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = f2cmin(i__3,chunk);
+		    dgemm_("N", "N", m, &blk, m, &c_b84, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b63, &work[il], &
+			    ldwrkl);
+		    dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 
+			    + 1], lda);
+/* L30: */
+		}
+
+	    } else if (wntqs) {
+
+/*              Path 3t (N >> M, JOBZ='S') */
+/*              M right singular vectors to be computed in VT and */
+/*              M left singular vectors to be computed in U */
+
+		il = 1;
+
+/*              WORK(IL) is M by M */
+
+		ldwrkl = *m;
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*              Compute A=L*Q */
+/*              Workspace: need   M*M [L] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [L] + M [tau] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy L to WORK(IL), zeroing out above it */
+
+		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		dlaset_("U", &i__2, &i__1, &c_b63, &c_b63, &work[il + ldwrkl],
+			 &ldwrkl);
+
+/*              Generate Q in A */
+/*              Workspace: need   M*M [L] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [L] + M [tau] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*              Bidiagonalize L in WORK(IU). */
+/*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M      [work] */
+/*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + BDSPAC */
+
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of L and VT */
+/*              by right singular vectors of L */
+/*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M    [work] */
+/*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*              Multiply right singular vectors of L in WORK(IL) by */
+/*              Q in A, storing result in VT */
+/*              Workspace: need   M*M [L] */
+
+		dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		dgemm_("N", "N", m, n, m, &c_b84, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b63, &vt[vt_offset], ldvt);
+
+	    } else if (wntqa) {
+
+/*              Path 4t (N >> M, JOBZ='A') */
+/*              N right singular vectors to be computed in VT and */
+/*              M left singular vectors to be computed in U */
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+
+		ldwkvt = *m;
+		itau = ivt + ldwkvt * *m;
+		nwork = itau + *m;
+
+/*              Compute A=L*Q, copying result to VT */
+/*              Workspace: need   M*M [VT] + M [tau] + M    [work] */
+/*              Workspace: prefer M*M [VT] + M [tau] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*              Generate Q in VT */
+/*              Workspace: need   M*M [VT] + M [tau] + N    [work] */
+/*              Workspace: prefer M*M [VT] + M [tau] + N*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+			nwork], &i__2, &ierr);
+
+/*              Produce L in A, zeroing out other entries */
+
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		dlaset_("U", &i__2, &i__1, &c_b63, &c_b63, &a[(a_dim1 << 1) + 
+			1], lda);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*              Bidiagonalize L in A */
+/*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + M      [work] */
+/*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in WORK(IVT) */
+/*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + BDSPAC */
+
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*              Overwrite U by left singular vectors of L and WORK(IVT) */
+/*              by right singular vectors of L */
+/*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup]+ M    [work] */
+/*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
+			ierr);
+
+/*              Multiply right singular vectors of L in WORK(IVT) by */
+/*              Q in VT, storing result in A */
+/*              Workspace: need   M*M [VT] */
+
+		dgemm_("N", "N", m, n, m, &c_b84, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b63, &a[a_offset], lda);
+
+/*              Copy right singular vectors of A from A to VT */
+
+		dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+	    }
+
+	} else {
+
+/*           N .LT. MNTHR */
+
+/*           Path 5t (N > M, but not much larger) */
+/*           Reduce to bidiagonal form without LQ decomposition */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*           Bidiagonalize A */
+/*           Workspace: need   3*M [e, tauq, taup] + N        [work] */
+/*           Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work] */
+
+	    i__2 = *lwork - nwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*              Path 5tn (N > M, JOBZ='N') */
+/*              Perform bidiagonal SVD, only computing singular values */
+/*              Workspace: need   3*M [e, tauq, taup] + BDSPAC */
+
+		dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+/*              Path 5to (N > M, JOBZ='O') */
+		ldwkvt = *m;
+		ivt = nwork;
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    dlaset_("F", m, n, &c_b63, &c_b63, &work[ivt], &ldwkvt);
+		    nwork = ivt + ldwkvt * *n;
+/*                 IL is unused; silence compile warnings */
+		    il = -1;
+		} else {
+
+/*                 WORK( IVT ) is M by M */
+
+		    nwork = ivt + ldwkvt * *m;
+		    il = nwork;
+
+/*                 WORK(IL) is M by CHUNK */
+
+		    chunk = (*lwork - *m * *m - *m * 3) / *m;
+		}
+
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in WORK(IVT) */
+/*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + BDSPAC */
+
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*              Overwrite U by left singular vectors of A */
+/*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work] */
+/*              Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] */
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*                 Path 5to-fast */
+/*                 Overwrite WORK(IVT) by left singular vectors of A */
+/*                 Workspace: need   3*M [e, tauq, taup] + M*N [VT] + M    [work] */
+/*                 Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work] */
+
+		    i__2 = *lwork - nwork + 1;
+		    dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, 
+			    &ierr);
+
+/*                 Copy right singular vectors of A from WORK(IVT) to A */
+
+		    dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		} else {
+
+/*                 Path 5to-slow */
+/*                 Generate P**T in A */
+/*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work] */
+/*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] */
+
+		    i__2 = *lwork - nwork + 1;
+		    dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[nwork], &i__2, &ierr);
+
+/*                 Multiply Q in A by right singular vectors of */
+/*                 bidiagonal matrix in WORK(IVT), storing result in */
+/*                 WORK(IL) and copying to A */
+/*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M*NB [L] */
+/*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N  [L] */
+
+		    i__2 = *n;
+		    i__1 = chunk;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *n - i__ + 1;
+			blk = f2cmin(i__3,chunk);
+			dgemm_("N", "N", m, &blk, m, &c_b84, &work[ivt], &
+				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b63, &
+				work[il], m);
+			dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + 
+				1], lda);
+/* L40: */
+		    }
+		}
+	    } else if (wntqs) {
+
+/*              Path 5ts (N > M, JOBZ='S') */
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*M [e, tauq, taup] + BDSPAC */
+
+		dlaset_("F", m, n, &c_b63, &c_b63, &vt[vt_offset], ldvt);
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Overwrite U by left singular vectors of A and VT */
+/*              by right singular vectors of A */
+/*              Workspace: need   3*M [e, tauq, taup] + M    [work] */
+/*              Workspace: prefer 3*M [e, tauq, taup] + M*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*              Path 5ta (N > M, JOBZ='A') */
+/*              Perform bidiagonal SVD, computing left singular vectors */
+/*              of bidiagonal matrix in U and computing right singular */
+/*              vectors of bidiagonal matrix in VT */
+/*              Workspace: need   3*M [e, tauq, taup] + BDSPAC */
+
+		dlaset_("F", n, n, &c_b63, &c_b63, &vt[vt_offset], ldvt);
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], 
+			info);
+
+/*              Set the right corner of VT to identity matrix */
+
+		if (*n > *m) {
+		    i__1 = *n - *m;
+		    i__2 = *n - *m;
+		    dlaset_("F", &i__1, &i__2, &c_b63, &c_b84, &vt[*m + 1 + (*
+			    m + 1) * vt_dim1], ldvt);
+		}
+
+/*              Overwrite U by left singular vectors of A and VT */
+/*              by right singular vectors of A */
+/*              Workspace: need   3*M [e, tauq, taup] + N    [work] */
+/*              Workspace: prefer 3*M [e, tauq, taup] + N*NB [work] */
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (doublereal) maxwrk;
+
+    return 0;
+
+/*     End of DGESDD */
+
+} /* dgesdd_ */
+
diff --git a/lapack-netlib/SRC/dgesv.c b/lapack-netlib/SRC/dgesv.c
new file mode 100644
index 000000000..30fdb4e15
--- /dev/null
+++ b/lapack-netlib/SRC/dgesv.c
@@ -0,0 +1,574 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief <b> DGESV computes the solution to system of linear equations A * X = B for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGESV + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesv.f
+"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesv.f
+"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesv.f
+"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDB, N, NRHS */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGESV computes the solution to a real system of linear equations */
+/* >    A * X = B, */
+/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */
+/* > */
+/* > The LU decomposition with partial pivoting and row interchanges is */
+/* > used to factor A as */
+/* >    A = P * L * U, */
+/* > where P is a permutation matrix, L is unit lower triangular, and U is */
+/* > upper triangular.  The factored form of A is then used to solve the */
+/* > system of equations A * X = B. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of linear equations, i.e., the order of the */
+/* >          matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the N-by-N coefficient matrix A. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U; the unit diagonal elements of L are not stored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices that define the permutation matrix P; */
+/* >          row i of the matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the N-by-NRHS matrix of right hand side matrix B. */
+/* >          On exit, if INFO = 0, the N-by-NRHS solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization */
+/* >                has been completed, but the factor U is exactly */
+/* >                singular, so the solution could not be computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer 
+	*lda, integer *ipiv, doublereal *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, 
+	    integer *, integer *, integer *), xerbla_(char *, integer *, ftnlen), dgetrs_(char *, integer *, integer *, doublereal *, 
+	    integer *, integer *, doublereal *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -4;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESV ", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Compute the LU factorization of A. */
+
+    dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
+
+/*        Solve the system A*X = B, overwriting B with X. */
+
+	dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+		b_offset], ldb, info);
+    }
+    return 0;
+
+/*     End of DGESV */
+
+} /* dgesv_ */
+
diff --git a/lapack-netlib/SRC/dgesvd.c b/lapack-netlib/SRC/dgesvd.c
new file mode 100644
index 000000000..597ad6dda
--- /dev/null
+++ b/lapack-netlib/SRC/dgesvd.c
@@ -0,0 +1,4475 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__6 = 6;
+static integer c__0 = 0;
+static integer c__2 = 2;
+static integer c_n1 = -1;
+static doublereal c_b57 = 0.;
+static integer c__1 = 1;
+static doublereal c_b79 = 1.;
+
+/* > \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGESVD + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, */
+/*                          WORK, LWORK, INFO ) */
+
+/*       CHARACTER          JOBU, JOBVT */
+/*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N */
+/*       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ), */
+/*      $                   VT( LDVT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGESVD computes the singular value decomposition (SVD) of a real */
+/* > M-by-N matrix A, optionally computing the left and/or right singular */
+/* > vectors. The SVD is written */
+/* > */
+/* >      A = U * SIGMA * transpose(V) */
+/* > */
+/* > where SIGMA is an M-by-N matrix which is zero except for its */
+/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */
+/* > V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA */
+/* > are the singular values of A; they are real and non-negative, and */
+/* > are returned in descending order.  The first f2cmin(m,n) columns of */
+/* > U and V are the left and right singular vectors of A. */
+/* > */
+/* > Note that the routine returns V**T, not V. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          Specifies options for computing all or part of the matrix U: */
+/* >          = 'A':  all M columns of U are returned in array U: */
+/* >          = 'S':  the first f2cmin(m,n) columns of U (the left singular */
+/* >                  vectors) are returned in the array U; */
+/* >          = 'O':  the first f2cmin(m,n) columns of U (the left singular */
+/* >                  vectors) are overwritten on the array A; */
+/* >          = 'N':  no columns of U (no left singular vectors) are */
+/* >                  computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVT */
+/* > \verbatim */
+/* >          JOBVT is CHARACTER*1 */
+/* >          Specifies options for computing all or part of the matrix */
+/* >          V**T: */
+/* >          = 'A':  all N rows of V**T are returned in the array VT; */
+/* >          = 'S':  the first f2cmin(m,n) rows of V**T (the right singular */
+/* >                  vectors) are returned in the array VT; */
+/* >          = 'O':  the first f2cmin(m,n) rows of V**T (the right singular */
+/* >                  vectors) are overwritten on the array A; */
+/* >          = 'N':  no rows of V**T (no right singular vectors) are */
+/* >                  computed. */
+/* > */
+/* >          JOBVT and JOBU cannot both be 'O'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          if JOBU = 'O',  A is overwritten with the first f2cmin(m,n) */
+/* >                          columns of U (the left singular vectors, */
+/* >                          stored columnwise); */
+/* >          if JOBVT = 'O', A is overwritten with the first f2cmin(m,n) */
+/* >                          rows of V**T (the right singular vectors, */
+/* >                          stored rowwise); */
+/* >          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */
+/* >                          are destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The singular values of A, sorted so that S(i) >= S(i+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is DOUBLE PRECISION array, dimension (LDU,UCOL) */
+/* >          (LDU,M) if JOBU = 'A' or (LDU,f2cmin(M,N)) if JOBU = 'S'. */
+/* >          If JOBU = 'A', U contains the M-by-M orthogonal matrix U; */
+/* >          if JOBU = 'S', U contains the first f2cmin(m,n) columns of U */
+/* >          (the left singular vectors, stored columnwise); */
+/* >          if JOBU = 'N' or 'O', U is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U.  LDU >= 1; if */
+/* >          JOBU = 'S' or 'A', LDU >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VT */
+/* > \verbatim */
+/* >          VT is DOUBLE PRECISION array, dimension (LDVT,N) */
+/* >          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix */
+/* >          V**T; */
+/* >          if JOBVT = 'S', VT contains the first f2cmin(m,n) rows of */
+/* >          V**T (the right singular vectors, stored rowwise); */
+/* >          if JOBVT = 'N' or 'O', VT is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVT */
+/* > \verbatim */
+/* >          LDVT is INTEGER */
+/* >          The leading dimension of the array VT.  LDVT >= 1; if */
+/* >          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= f2cmin(M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */
+/* >          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged */
+/* >          superdiagonal elements of an upper bidiagonal matrix B */
+/* >          whose diagonal is in S (not necessarily sorted). B */
+/* >          satisfies A = U * B * VT, so it has the same singular values */
+/* >          as A, and singular vectors related by U and VT. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): */
+/* >             - PATH 1  (M much larger than N, JOBU='N') */
+/* >             - PATH 1t (N much larger than M, JOBVT='N') */
+/* >          LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths */
+/* >          For good performance, LWORK should generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if DBDSQR did not converge, INFO specifies how many */
+/* >                superdiagonals of an intermediate bidiagonal form B */
+/* >                did not converge to zero. See the description of WORK */
+/* >                above for details. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup doubleGEsing */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgesvd_(char *jobu, char *jobvt, integer *m, integer *n, 
+	doublereal *a, integer *lda, doublereal *s, doublereal *u, integer *
+	ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, 
+	integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], 
+	    i__2, i__3, i__4;
+    char ch__1[2];
+
+    /* Local variables */
+    integer iscl;
+    doublereal anrm;
+    integer ierr, itau, ncvt, nrvt, lwork_dgebrd__, lwork_dgelqf__, 
+	    lwork_dgeqrf__, i__;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    integer chunk, minmn, wrkbl, itaup, itauq, mnthr, iwork;
+    logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs;
+    integer ie;
+    extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    integer ir, bdspac, iu;
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *), 
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *, 
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dgeqrf_(integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
+	     integer *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *), dlaset_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *), 
+	    dbdsqr_(char *, integer *, integer *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *), dorgbr_(char *, integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *);
+    doublereal bignum;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *, integer *);
+    integer ldwrkr, minwrk, ldwrku, maxwrk;
+    doublereal smlnum;
+    logical lquery, wntuas, wntvas;
+    integer lwork_dorgbr_p__, lwork_dorgbr_q__, lwork_dorglq_m__, 
+	    lwork_dorglq_n__, lwork_dorgqr_m__, lwork_dorgqr_n__, blk, ncu;
+    doublereal dum[1], eps;
+    integer nru;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1 * 1;
+    vt -= vt_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    minmn = f2cmin(*m,*n);
+    wntua = lsame_(jobu, "A");
+    wntus = lsame_(jobu, "S");
+    wntuas = wntua || wntus;
+    wntuo = lsame_(jobu, "O");
+    wntun = lsame_(jobu, "N");
+    wntva = lsame_(jobvt, "A");
+    wntvs = lsame_(jobvt, "S");
+    wntvas = wntva || wntvs;
+    wntvo = lsame_(jobvt, "O");
+    wntvn = lsame_(jobvt, "N");
+    lquery = *lwork == -1;
+
+    if (! (wntua || wntus || wntuo || wntun)) {
+	*info = -1;
+    } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -6;
+    } else if (*ldu < 1 || wntuas && *ldu < *m) {
+	*info = -9;
+    } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) {
+	*info = -11;
+    }
+
+/*     Compute workspace */
+/*      (Note: Comments in the code beginning "Workspace:" describe the */
+/*       minimal amount of workspace needed at that point in the code, */
+/*       as well as the preferred amount for good performance. */
+/*       NB refers to the optimal block size for the immediately */
+/*       following subroutine, as returned by ILAENV.) */
+
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	if (*m >= *n && minmn > 0) {
+
+/*           Compute space needed for DBDSQR */
+
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = jobu;
+	    i__1[1] = 1, a__1[1] = jobvt;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    mnthr = ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0, (
+		    ftnlen)6, (ftnlen)2);
+	    bdspac = *n * 5;
+/*           Compute space needed for DGEQRF */
+	    dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_dgeqrf__ = (integer) dum[0];
+/*           Compute space needed for DORGQR */
+	    dorgqr_(m, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_dorgqr_n__ = (integer) dum[0];
+	    dorgqr_(m, m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_dorgqr_m__ = (integer) dum[0];
+/*           Compute space needed for DGEBRD */
+	    dgebrd_(n, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1,
+		     &ierr);
+	    lwork_dgebrd__ = (integer) dum[0];
+/*           Compute space needed for DORGBR P */
+	    dorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_dorgbr_p__ = (integer) dum[0];
+/*           Compute space needed for DORGBR Q */
+	    dorgbr_("Q", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_dorgbr_q__ = (integer) dum[0];
+
+	    if (*m >= mnthr) {
+		if (wntun) {
+
+/*                 Path 1 (M much larger than N, JOBU='N') */
+
+		    maxwrk = *n + lwork_dgeqrf__;
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * 3 + lwork_dgebrd__;
+		    maxwrk = f2cmax(i__2,i__3);
+		    if (wntvo || wntvas) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_p__;
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		    i__2 = *n << 2;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntuo && wntvn) {
+
+/*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
+
+		    wrkbl = *n + lwork_dgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+/* Computing MAX */
+		    i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
+		    maxwrk = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntuo && wntvas) {
+
+/*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */
+/*                 'A') */
+
+		    wrkbl = *n + lwork_dgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+/* Computing MAX */
+		    i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
+		    maxwrk = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntus && wntvn) {
+
+/*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
+
+		    wrkbl = *n + lwork_dgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *n * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntus && wntvo) {
+
+/*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
+
+		    wrkbl = *n + lwork_dgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = (*n << 1) * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntus && wntvas) {
+
+/*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */
+/*                 'A') */
+
+		    wrkbl = *n + lwork_dgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *n * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntua && wntvn) {
+
+/*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
+
+		    wrkbl = *n + lwork_dgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_dorgqr_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *n * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntua && wntvo) {
+
+/*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
+
+		    wrkbl = *n + lwork_dgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_dorgqr_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = (*n << 1) * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntua && wntvas) {
+
+/*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */
+/*                 'A') */
+
+		    wrkbl = *n + lwork_dgeqrf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n + lwork_dorgqr_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *n * *n + wrkbl;
+/* Computing MAX */
+		    i__2 = *n * 3 + *m;
+		    minwrk = f2cmax(i__2,bdspac);
+		}
+	    } else {
+
+/*              Path 10 (M at least N, but not much larger) */
+
+		dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &
+			c_n1, &ierr);
+		lwork_dgebrd__ = (integer) dum[0];
+		maxwrk = *n * 3 + lwork_dgebrd__;
+		if (wntus || wntuo) {
+		    dorgbr_("Q", m, n, n, &a[a_offset], lda, dum, dum, &c_n1, 
+			    &ierr);
+		    lwork_dorgbr_q__ = (integer) dum[0];
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		if (wntua) {
+		    dorgbr_("Q", m, m, n, &a[a_offset], lda, dum, dum, &c_n1, 
+			    &ierr);
+		    lwork_dorgbr_q__ = (integer) dum[0];
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_q__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		if (! wntvn) {
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_p__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		i__2 = *n * 3 + *m;
+		minwrk = f2cmax(i__2,bdspac);
+	    }
+	} else if (minmn > 0) {
+
+/*           Compute space needed for DBDSQR */
+
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = jobu;
+	    i__1[1] = 1, a__1[1] = jobvt;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    mnthr = ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0, (
+		    ftnlen)6, (ftnlen)2);
+	    bdspac = *m * 5;
+/*           Compute space needed for DGELQF */
+	    dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_dgelqf__ = (integer) dum[0];
+/*           Compute space needed for DORGLQ */
+	    dorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr);
+	    lwork_dorglq_n__ = (integer) dum[0];
+	    dorglq_(m, n, m, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
+	    lwork_dorglq_m__ = (integer) dum[0];
+/*           Compute space needed for DGEBRD */
+	    dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1,
+		     &ierr);
+	    lwork_dgebrd__ = (integer) dum[0];
+/*            Compute space needed for DORGBR P */
+	    dorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
+	    lwork_dorgbr_p__ = (integer) dum[0];
+/*           Compute space needed for DORGBR Q */
+	    dorgbr_("Q", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
+	    lwork_dorgbr_q__ = (integer) dum[0];
+	    if (*n >= mnthr) {
+		if (wntvn) {
+
+/*                 Path 1t(N much larger than M, JOBVT='N') */
+
+		    maxwrk = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * 3 + lwork_dgebrd__;
+		    maxwrk = f2cmax(i__2,i__3);
+		    if (wntuo || wntuas) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_q__;
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		    i__2 = *m << 2;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntvo && wntun) {
+
+/*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
+
+		    wrkbl = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+/* Computing MAX */
+		    i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
+		    maxwrk = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntvo && wntuas) {
+
+/*                 Path 3t(N much larger than M, JOBU='S' or 'A', */
+/*                 JOBVT='O') */
+
+		    wrkbl = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+/* Computing MAX */
+		    i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
+		    maxwrk = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntvs && wntun) {
+
+/*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
+
+		    wrkbl = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *m * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntvs && wntuo) {
+
+/*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
+
+		    wrkbl = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = (*m << 1) * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntvs && wntuas) {
+
+/*                 Path 6t(N much larger than M, JOBU='S' or 'A', */
+/*                 JOBVT='S') */
+
+		    wrkbl = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *m * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntva && wntun) {
+
+/*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
+
+		    wrkbl = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_dorglq_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *m * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntva && wntuo) {
+
+/*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
+
+		    wrkbl = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_dorglq_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = (*m << 1) * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		} else if (wntva && wntuas) {
+
+/*                 Path 9t(N much larger than M, JOBU='S' or 'A', */
+/*                 JOBVT='A') */
+
+		    wrkbl = *m + lwork_dgelqf__;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m + lwork_dorglq_n__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    wrkbl = f2cmax(i__2,i__3);
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__;
+		    wrkbl = f2cmax(i__2,i__3);
+		    wrkbl = f2cmax(wrkbl,bdspac);
+		    maxwrk = *m * *m + wrkbl;
+/* Computing MAX */
+		    i__2 = *m * 3 + *n;
+		    minwrk = f2cmax(i__2,bdspac);
+		}
+	    } else {
+
+/*              Path 10t(N greater than M, but not much larger) */
+
+		dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &
+			c_n1, &ierr);
+		lwork_dgebrd__ = (integer) dum[0];
+		maxwrk = *m * 3 + lwork_dgebrd__;
+		if (wntvs || wntvo) {
+/*                Compute space needed for DORGBR P */
+		    dorgbr_("P", m, n, m, &a[a_offset], n, dum, dum, &c_n1, &
+			    ierr);
+		    lwork_dorgbr_p__ = (integer) dum[0];
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		if (wntva) {
+		    dorgbr_("P", n, n, m, &a[a_offset], n, dum, dum, &c_n1, &
+			    ierr);
+		    lwork_dorgbr_p__ = (integer) dum[0];
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_p__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		if (! wntun) {
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_q__;
+		    maxwrk = f2cmax(i__2,i__3);
+		}
+		maxwrk = f2cmax(maxwrk,bdspac);
+/* Computing MAX */
+		i__2 = *m * 3 + *n;
+		minwrk = f2cmax(i__2,bdspac);
+	    }
+	}
+	maxwrk = f2cmax(maxwrk,minwrk);
+	work[1] = (doublereal) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("DGESVD", &i__2, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = dlamch_("P");
+    smlnum = sqrt(dlamch_("S")) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*        A has at least as many rows as columns. If A has sufficiently */
+/*        more rows than columns, first reduce using the QR */
+/*        decomposition (if sufficient workspace available) */
+
+	if (*m >= mnthr) {
+
+	    if (wntun) {
+
+/*              Path 1 (M much larger than N, JOBU='N') */
+/*              No left singular vectors to be computed */
+
+		itau = 1;
+		iwork = itau + *n;
+
+/*              Compute A=Q*R */
+/*              (Workspace: need 2*N, prefer N + N*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
+			i__2, &ierr);
+
+/*              Zero out below R */
+
+		if (*n > 1) {
+		    i__2 = *n - 1;
+		    i__3 = *n - 1;
+		    dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[a_dim1 + 2],
+			     lda);
+		}
+		ie = 1;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		iwork = itaup + *n;
+
+/*              Bidiagonalize R in A */
+/*              (Workspace: need 4*N, prefer 3*N + 2*N*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+		ncvt = 0;
+		if (wntvo || wntvas) {
+
+/*                 If right singular vectors desired, generate P'. */
+/*                 (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &
+			    work[iwork], &i__2, &ierr);
+		    ncvt = *n;
+		}
+		iwork = ie + *n;
+
+/*              Perform bidiagonal QR iteration, computing right */
+/*              singular vectors of A in A if desired */
+/*              (Workspace: need BDSPAC) */
+
+		dbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[
+			a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], 
+			info);
+
+/*              If right singular vectors desired in VT, copy them there */
+
+		if (wntvas) {
+		    dlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], 
+			    ldvt);
+		}
+
+	    } else if (wntuo && wntvn) {
+
+/*              Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
+/*              N left singular vectors to be overwritten on A and */
+/*              no right singular vectors to be computed */
+
+/* Computing MAX */
+		i__2 = *n << 2;
+		if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                 Sufficient workspace for a fast algorithm */
+
+		    ir = 1;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *lda * *n + *n;
+		    if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) {
+
+/*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N */
+
+			ldwrku = *lda;
+			ldwrkr = *lda;
+		    } else /* if(complicated condition) */ {
+/* Computing MAX */
+			i__2 = wrkbl, i__3 = *lda * *n + *n;
+			if (*lwork >= f2cmax(i__2,i__3) + *n * *n) {
+
+/*                    WORK(IU) is LDA by N, WORK(IR) is N by N */
+
+			    ldwrku = *lda;
+			    ldwrkr = *n;
+			} else {
+
+/*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N */
+
+			    ldwrku = (*lwork - *n * *n - *n) / *n;
+			    ldwrkr = *n;
+			}
+		    }
+		    itau = ir + ldwrkr * *n;
+		    iwork = itau + *n;
+
+/*                 Compute A=Q*R */
+/*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__2, &ierr);
+
+/*                 Copy R to WORK(IR) and zero out below it */
+
+		    dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		    i__2 = *n - 1;
+		    i__3 = *n - 1;
+		    dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 1], 
+			    &ldwrkr);
+
+/*                 Generate Q in A */
+/*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__2, &ierr);
+		    ie = itau;
+		    itauq = ie + *n;
+		    itaup = itauq + *n;
+		    iwork = itaup + *n;
+
+/*                 Bidiagonalize R in WORK(IR) */
+/*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+
+/*                 Generate left vectors bidiagonalizing R */
+/*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
+			    work[iwork], &i__2, &ierr);
+		    iwork = ie + *n;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of R in WORK(IR) */
+/*                 (Workspace: need N*N + BDSPAC) */
+
+		    dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, &
+			    c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork]
+			    , info);
+		    iu = ie + *n;
+
+/*                 Multiply Q in A by left singular vectors of R in */
+/*                 WORK(IR), storing result in WORK(IU) and copying to A */
+/*                 (Workspace: need N*N + 2*N, prefer N*N + M*N + N) */
+
+		    i__2 = *m;
+		    i__3 = ldwrku;
+		    for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__3) {
+/* Computing MIN */
+			i__4 = *m - i__ + 1;
+			chunk = f2cmin(i__4,ldwrku);
+			dgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + 
+				a_dim1], lda, &work[ir], &ldwrkr, &c_b57, &
+				work[iu], &ldwrku);
+			dlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + 
+				a_dim1], lda);
+/* L10: */
+		    }
+
+		} else {
+
+/*                 Insufficient workspace for a fast algorithm */
+
+		    ie = 1;
+		    itauq = ie + *n;
+		    itaup = itauq + *n;
+		    iwork = itaup + *n;
+
+/*                 Bidiagonalize A */
+/*                 (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__3, &ierr);
+
+/*                 Generate left vectors bidiagonalizing A */
+/*                 (Workspace: need 4*N, prefer 3*N + N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[iwork], &i__3, &ierr);
+		    iwork = ie + *n;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of A in A */
+/*                 (Workspace: need BDSPAC) */
+
+		    dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, &
+			    c__1, &a[a_offset], lda, dum, &c__1, &work[iwork],
+			     info);
+
+		}
+
+	    } else if (wntuo && wntvas) {
+
+/*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */
+/*              N left singular vectors to be overwritten on A and */
+/*              N right singular vectors to be computed in VT */
+
+/* Computing MAX */
+		i__3 = *n << 2;
+		if (*lwork >= *n * *n + f2cmax(i__3,bdspac)) {
+
+/*                 Sufficient workspace for a fast algorithm */
+
+		    ir = 1;
+/* Computing MAX */
+		    i__3 = wrkbl, i__2 = *lda * *n + *n;
+		    if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N */
+
+			ldwrku = *lda;
+			ldwrkr = *lda;
+		    } else /* if(complicated condition) */ {
+/* Computing MAX */
+			i__3 = wrkbl, i__2 = *lda * *n + *n;
+			if (*lwork >= f2cmax(i__3,i__2) + *n * *n) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is N by N */
+
+			    ldwrku = *lda;
+			    ldwrkr = *n;
+			} else {
+
+/*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N */
+
+			    ldwrku = (*lwork - *n * *n - *n) / *n;
+			    ldwrkr = *n;
+			}
+		    }
+		    itau = ir + ldwrkr * *n;
+		    iwork = itau + *n;
+
+/*                 Compute A=Q*R */
+/*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__3, &ierr);
+
+/*                 Copy R to VT, zeroing out below it */
+
+		    dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
+			    ldvt);
+		    if (*n > 1) {
+			i__3 = *n - 1;
+			i__2 = *n - 1;
+			dlaset_("L", &i__3, &i__2, &c_b57, &c_b57, &vt[
+				vt_dim1 + 2], ldvt);
+		    }
+
+/*                 Generate Q in A */
+/*                 (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__3, &ierr);
+		    ie = itau;
+		    itauq = ie + *n;
+		    itaup = itauq + *n;
+		    iwork = itaup + *n;
+
+/*                 Bidiagonalize R in VT, copying result to WORK(IR) */
+/*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
+			    work[itauq], &work[itaup], &work[iwork], &i__3, &
+			    ierr);
+		    dlacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], &
+			    ldwrkr);
+
+/*                 Generate left vectors bidiagonalizing R in WORK(IR) */
+/*                 (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
+			    work[iwork], &i__3, &ierr);
+
+/*                 Generate right vectors bidiagonalizing R in VT */
+/*                 (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
+			    &work[iwork], &i__3, &ierr);
+		    iwork = ie + *n;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of R in WORK(IR) and computing right */
+/*                 singular vectors of R in VT */
+/*                 (Workspace: need N*N + BDSPAC) */
+
+		    dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
+			    vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, 
+			    &work[iwork], info);
+		    iu = ie + *n;
+
+/*                 Multiply Q in A by left singular vectors of R in */
+/*                 WORK(IR), storing result in WORK(IU) and copying to A */
+/*                 (Workspace: need N*N + 2*N, prefer N*N + M*N + N) */
+
+		    i__3 = *m;
+		    i__2 = ldwrku;
+		    for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
+			     i__2) {
+/* Computing MIN */
+			i__4 = *m - i__ + 1;
+			chunk = f2cmin(i__4,ldwrku);
+			dgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + 
+				a_dim1], lda, &work[ir], &ldwrkr, &c_b57, &
+				work[iu], &ldwrku);
+			dlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + 
+				a_dim1], lda);
+/* L20: */
+		    }
+
+		} else {
+
+/*                 Insufficient workspace for a fast algorithm */
+
+		    itau = 1;
+		    iwork = itau + *n;
+
+/*                 Compute A=Q*R */
+/*                 (Workspace: need 2*N, prefer N + N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__2, &ierr);
+
+/*                 Copy R to VT, zeroing out below it */
+
+		    dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
+			    ldvt);
+		    if (*n > 1) {
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
+				vt_dim1 + 2], ldvt);
+		    }
+
+/*                 Generate Q in A */
+/*                 (Workspace: need 2*N, prefer N + N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__2, &ierr);
+		    ie = itau;
+		    itauq = ie + *n;
+		    itaup = itauq + *n;
+		    iwork = itaup + *n;
+
+/*                 Bidiagonalize R in VT */
+/*                 (Workspace: need 4*N, prefer 3*N + 2*N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
+			    work[itauq], &work[itaup], &work[iwork], &i__2, &
+			    ierr);
+
+/*                 Multiply Q in A by left vectors bidiagonalizing R */
+/*                 (Workspace: need 3*N + M, prefer 3*N + M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, &
+			    work[itauq], &a[a_offset], lda, &work[iwork], &
+			    i__2, &ierr);
+
+/*                 Generate right vectors bidiagonalizing R in VT */
+/*                 (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
+			    &work[iwork], &i__2, &ierr);
+		    iwork = ie + *n;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of A in A and computing right */
+/*                 singular vectors of A in VT */
+/*                 (Workspace: need BDSPAC) */
+
+		    dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
+			    vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
+			    work[iwork], info);
+
+		}
+
+	    } else if (wntus) {
+
+		if (wntvn) {
+
+/*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
+/*                 N left singular vectors to be computed in U and */
+/*                 no right singular vectors to be computed */
+
+/* Computing MAX */
+		    i__2 = *n << 2;
+		    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			ir = 1;
+			if (*lwork >= wrkbl + *lda * *n) {
+
+/*                       WORK(IR) is LDA by N */
+
+			    ldwrkr = *lda;
+			} else {
+
+/*                       WORK(IR) is N by N */
+
+			    ldwrkr = *n;
+			}
+			itau = ir + ldwrkr * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R */
+/*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IR), zeroing out below it */
+
+			dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
+				ldwrkr);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+				1], &ldwrkr);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IR) */
+/*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Generate left vectors bidiagonalizing R in WORK(IR) */
+/*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IR) */
+/*                    (Workspace: need N*N + BDSPAC) */
+
+			dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 
+				dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply Q in A by left singular vectors of R in */
+/*                    WORK(IR), storing result in U */
+/*                    (Workspace: need N*N) */
+
+			dgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
+				work[ir], &ldwrkr, &c_b57, &u[u_offset], ldu);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need 2*N, prefer N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Zero out below R in A */
+
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
+				    a_dim1 + 2], lda);
+			}
+
+/*                    Bidiagonalize R in A */
+/*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left vectors bidiagonalizing R */
+/*                    (Workspace: need 3*N + M, prefer 3*N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
+				work[itauq], &u[u_offset], ldu, &work[iwork], 
+				&i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 
+				dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
+				work[iwork], info);
+
+		    }
+
+		} else if (wntvo) {
+
+/*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
+/*                 N left singular vectors to be computed in U and */
+/*                 N right singular vectors to be overwritten on A */
+
+/* Computing MAX */
+		    i__2 = *n << 2;
+		    if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + (*lda << 1) * *n) {
+
+/*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *lda;
+			} else if (*lwork >= wrkbl + (*lda + *n) * *n) {
+
+/*                       WORK(IU) is LDA by N and WORK(IR) is N by N */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *n;
+			} else {
+
+/*                       WORK(IU) is N by N and WORK(IR) is N by N */
+
+			    ldwrku = *n;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *n;
+			}
+			itau = ir + ldwrkr * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R */
+/*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IU), zeroing out below it */
+
+			dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				1], &ldwrku);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IU), copying result to */
+/*                    WORK(IR) */
+/*                    (Workspace: need 2*N*N + 4*N, */
+/*                                prefer 2*N*N+3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			dlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
+				ldwrkr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need 2*N*N + 4*N-1, */
+/*                                prefer 2*N*N+3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IU) and computing */
+/*                    right singular vectors of R in WORK(IR) */
+/*                    (Workspace: need 2*N*N + BDSPAC) */
+
+			dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
+				ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 
+				&work[iwork], info);
+
+/*                    Multiply Q in A by left singular vectors of R in */
+/*                    WORK(IU), storing result in U */
+/*                    (Workspace: need N*N) */
+
+			dgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
+				work[iu], &ldwrku, &c_b57, &u[u_offset], ldu);
+
+/*                    Copy right singular vectors of R to A */
+/*                    (Workspace: need N*N) */
+
+			dlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
+				lda);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need 2*N, prefer N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Zero out below R in A */
+
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
+				    a_dim1 + 2], lda);
+			}
+
+/*                    Bidiagonalize R in A */
+/*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left vectors bidiagonalizing R */
+/*                    (Workspace: need 3*N + M, prefer 3*N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
+				work[itauq], &u[u_offset], ldu, &work[iwork], 
+				&i__2, &ierr)
+				;
+
+/*                    Generate right vectors bidiagonalizing R in A */
+/*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in A */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
+				a_offset], lda, &u[u_offset], ldu, dum, &c__1,
+				 &work[iwork], info);
+
+		    }
+
+		} else if (wntvas) {
+
+/*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' */
+/*                         or 'A') */
+/*                 N left singular vectors to be computed in U and */
+/*                 N right singular vectors to be computed in VT */
+
+/* Computing MAX */
+		    i__2 = *n << 2;
+		    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + *lda * *n) {
+
+/*                       WORK(IU) is LDA by N */
+
+			    ldwrku = *lda;
+			} else {
+
+/*                       WORK(IU) is N by N */
+
+			    ldwrku = *n;
+			}
+			itau = iu + ldwrku * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R */
+/*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IU), zeroing out below it */
+
+			dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				1], &ldwrku);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IU), copying result to VT */
+/*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			dlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
+				 ldvt);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in VT */
+/*                    (Workspace: need N*N + 4*N-1, */
+/*                                prefer N*N+3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
+				itaup], &work[iwork], &i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IU) and computing */
+/*                    right singular vectors of R in VT */
+/*                    (Workspace: need N*N + BDSPAC) */
+
+			dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &work[iu], &ldwrku, dum, &
+				c__1, &work[iwork], info);
+
+/*                    Multiply Q in A by left singular vectors of R in */
+/*                    WORK(IU), storing result in U */
+/*                    (Workspace: need N*N) */
+
+			dgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
+				work[iu], &ldwrku, &c_b57, &u[u_offset], ldu);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need 2*N, prefer N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy R to VT, zeroing out below it */
+
+			dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
+				    vt_dim1 + 2], ldvt);
+			}
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in VT */
+/*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 
+				&work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left bidiagonalizing vectors */
+/*                    in VT */
+/*                    (Workspace: need 3*N + M, prefer 3*N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
+				&work[itauq], &u[u_offset], ldu, &work[iwork],
+				 &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in VT */
+/*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
+				itaup], &work[iwork], &i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &u[u_offset], ldu, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		}
+
+	    } else if (wntua) {
+
+		if (wntvn) {
+
+/*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
+/*                 M left singular vectors to be computed in U and */
+/*                 no right singular vectors to be computed */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			ir = 1;
+			if (*lwork >= wrkbl + *lda * *n) {
+
+/*                       WORK(IR) is LDA by N */
+
+			    ldwrkr = *lda;
+			} else {
+
+/*                       WORK(IR) is N by N */
+
+			    ldwrkr = *n;
+			}
+			itau = ir + ldwrkr * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Copy R to WORK(IR), zeroing out below it */
+
+			dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
+				ldwrkr);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+				1], &ldwrkr);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N*N + N + M, prefer N*N + N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IR) */
+/*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IR) */
+/*                    (Workspace: need N*N + BDSPAC) */
+
+			dbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 
+				dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply Q in U by left singular vectors of R in */
+/*                    WORK(IR), storing result in A */
+/*                    (Workspace: need N*N) */
+
+			dgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
+				work[ir], &ldwrkr, &c_b57, &a[a_offset], lda);
+
+/*                    Copy left singular vectors of A from A to U */
+
+			dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N + M, prefer N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Zero out below R in A */
+
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
+				    a_dim1 + 2], lda);
+			}
+
+/*                    Bidiagonalize R in A */
+/*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left bidiagonalizing vectors */
+/*                    in A */
+/*                    (Workspace: need 3*N + M, prefer 3*N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
+				work[itauq], &u[u_offset], ldu, &work[iwork], 
+				&i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 
+				dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
+				work[iwork], info);
+
+		    }
+
+		} else if (wntvo) {
+
+/*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
+/*                 M left singular vectors to be computed in U and */
+/*                 N right singular vectors to be overwritten on A */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + (*lda << 1) * *n) {
+
+/*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *lda;
+			} else if (*lwork >= wrkbl + (*lda + *n) * *n) {
+
+/*                       WORK(IU) is LDA by N and WORK(IR) is N by N */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *n;
+			} else {
+
+/*                       WORK(IU) is N by N and WORK(IR) is N by N */
+
+			    ldwrku = *n;
+			    ir = iu + ldwrku * *n;
+			    ldwrkr = *n;
+			}
+			itau = ir + ldwrkr * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IU), zeroing out below it */
+
+			dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				1], &ldwrku);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IU), copying result to */
+/*                    WORK(IR) */
+/*                    (Workspace: need 2*N*N + 4*N, */
+/*                                prefer 2*N*N+3*N+2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			dlacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
+				ldwrkr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need 2*N*N + 4*N-1, */
+/*                                prefer 2*N*N+3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IU) and computing */
+/*                    right singular vectors of R in WORK(IR) */
+/*                    (Workspace: need 2*N*N + BDSPAC) */
+
+			dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
+				ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 
+				&work[iwork], info);
+
+/*                    Multiply Q in U by left singular vectors of R in */
+/*                    WORK(IU), storing result in A */
+/*                    (Workspace: need N*N) */
+
+			dgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
+				work[iu], &ldwrku, &c_b57, &a[a_offset], lda);
+
+/*                    Copy left singular vectors of A from A to U */
+
+			dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Copy right singular vectors of R from WORK(IR) to A */
+
+			dlacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
+				lda);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N + M, prefer N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Zero out below R in A */
+
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
+				    a_dim1 + 2], lda);
+			}
+
+/*                    Bidiagonalize R in A */
+/*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left bidiagonalizing vectors */
+/*                    in A */
+/*                    (Workspace: need 3*N + M, prefer 3*N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
+				work[itauq], &u[u_offset], ldu, &work[iwork], 
+				&i__2, &ierr)
+				;
+
+/*                    Generate right bidiagonalizing vectors in A */
+/*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in A */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
+				a_offset], lda, &u[u_offset], ldu, dum, &c__1,
+				 &work[iwork], info);
+
+		    }
+
+		} else if (wntvas) {
+
+/*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' */
+/*                         or 'A') */
+/*                 M left singular vectors to be computed in U and */
+/*                 N right singular vectors to be computed in VT */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + *lda * *n) {
+
+/*                       WORK(IU) is LDA by N */
+
+			    ldwrku = *lda;
+			} else {
+
+/*                       WORK(IU) is N by N */
+
+			    ldwrku = *n;
+			}
+			itau = iu + ldwrku * *n;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N*N + N + M, prefer N*N + N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy R to WORK(IU), zeroing out below it */
+
+			dlacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *n - 1;
+			i__3 = *n - 1;
+			dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				1], &ldwrku);
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in WORK(IU), copying result to VT */
+/*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			dlacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
+				 ldvt);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in VT */
+/*                    (Workspace: need N*N + 4*N-1, */
+/*                                prefer N*N+3*N+(N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
+				itaup], &work[iwork], &i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of R in WORK(IU) and computing */
+/*                    right singular vectors of R in VT */
+/*                    (Workspace: need N*N + BDSPAC) */
+
+			dbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &work[iu], &ldwrku, dum, &
+				c__1, &work[iwork], info);
+
+/*                    Multiply Q in U by left singular vectors of R in */
+/*                    WORK(IU), storing result in A */
+/*                    (Workspace: need N*N) */
+
+			dgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
+				work[iu], &ldwrku, &c_b57, &a[a_offset], lda);
+
+/*                    Copy left singular vectors of A from A to U */
+
+			dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *n;
+
+/*                    Compute A=Q*R, copying result to U */
+/*                    (Workspace: need 2*N, prefer N + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+
+/*                    Generate Q in U */
+/*                    (Workspace: need N + M, prefer N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy R from A to VT, zeroing out below it */
+
+			dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+			if (*n > 1) {
+			    i__2 = *n - 1;
+			    i__3 = *n - 1;
+			    dlaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
+				    vt_dim1 + 2], ldvt);
+			}
+			ie = itau;
+			itauq = ie + *n;
+			itaup = itauq + *n;
+			iwork = itaup + *n;
+
+/*                    Bidiagonalize R in VT */
+/*                    (Workspace: need 4*N, prefer 3*N + 2*N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 
+				&work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply Q in U by left bidiagonalizing vectors */
+/*                    in VT */
+/*                    (Workspace: need 3*N + M, prefer 3*N + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
+				&work[itauq], &u[u_offset], ldu, &work[iwork],
+				 &i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in VT */
+/*                    (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
+				itaup], &work[iwork], &i__2, &ierr)
+				;
+			iwork = ie + *n;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &u[u_offset], ldu, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		}
+
+	    }
+
+	} else {
+
+/*           M .LT. MNTHR */
+
+/*           Path 10 (M at least N, but not much larger) */
+/*           Reduce to bidiagonal form without QR decomposition */
+
+	    ie = 1;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    iwork = itaup + *n;
+
+/*           Bidiagonalize A */
+/*           (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[iwork], &i__2, &ierr);
+	    if (wntuas) {
+
+/*              If left singular vectors desired in U, copy result to U */
+/*              and generate left bidiagonalizing vectors in U */
+/*              (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB) */
+
+		dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		if (wntus) {
+		    ncu = *n;
+		}
+		if (wntua) {
+		    ncu = *m;
+		}
+		i__2 = *lwork - iwork + 1;
+		dorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], &
+			work[iwork], &i__2, &ierr);
+	    }
+	    if (wntvas) {
+
+/*              If right singular vectors desired in VT, copy result to */
+/*              VT and generate right bidiagonalizing vectors in VT */
+/*              (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */
+
+		dlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - iwork + 1;
+		dorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[iwork], &i__2, &ierr);
+	    }
+	    if (wntuo) {
+
+/*              If left singular vectors desired in A, generate left */
+/*              bidiagonalizing vectors in A */
+/*              (Workspace: need 4*N, prefer 3*N + N*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    if (wntvo) {
+
+/*              If right singular vectors desired in A, generate right */
+/*              bidiagonalizing vectors in A */
+/*              (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		dorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    iwork = ie + *n;
+	    if (wntuas || wntuo) {
+		nru = *m;
+	    }
+	    if (wntun) {
+		nru = 0;
+	    }
+	    if (wntvas || wntvo) {
+		ncvt = *n;
+	    }
+	    if (wntvn) {
+		ncvt = 0;
+	    }
+	    if (! wntuo && ! wntvo) {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in U and computing right singular */
+/*              vectors in VT */
+/*              (Workspace: need BDSPAC) */
+
+		dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
+			vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
+			work[iwork], info);
+	    } else if (! wntuo && wntvo) {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in U and computing right singular */
+/*              vectors in A */
+/*              (Workspace: need BDSPAC) */
+
+		dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
+			a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
+			iwork], info);
+	    } else {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in A and computing right singular */
+/*              vectors in VT */
+/*              (Workspace: need BDSPAC) */
+
+		dbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
+			vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
+			work[iwork], info);
+	    }
+
+	}
+
+    } else {
+
+/*        A has more columns than rows. If A has sufficiently more */
+/*        columns than rows, first reduce using the LQ decomposition (if */
+/*        sufficient workspace available) */
+
+	if (*n >= mnthr) {
+
+	    if (wntvn) {
+
+/*              Path 1t(N much larger than M, JOBVT='N') */
+/*              No right singular vectors to be computed */
+
+		itau = 1;
+		iwork = itau + *m;
+
+/*              Compute A=L*Q */
+/*              (Workspace: need 2*M, prefer M + M*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
+			i__2, &ierr);
+
+/*              Zero out above L */
+
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 << 1) + 
+			1], lda);
+		ie = 1;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		iwork = itaup + *m;
+
+/*              Bidiagonalize L in A */
+/*              (Workspace: need 4*M, prefer 3*M + 2*M*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+		if (wntuo || wntuas) {
+
+/*                 If left singular vectors desired, generate Q */
+/*                 (Workspace: need 4*M, prefer 3*M + M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], &
+			    work[iwork], &i__2, &ierr);
+		}
+		iwork = ie + *m;
+		nru = 0;
+		if (wntuo || wntuas) {
+		    nru = *m;
+		}
+
+/*              Perform bidiagonal QR iteration, computing left singular */
+/*              vectors of A in A if desired */
+/*              (Workspace: need BDSPAC) */
+
+		dbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, &
+			c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], 
+			info);
+
+/*              If left singular vectors desired in U, copy them there */
+
+		if (wntuas) {
+		    dlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		}
+
+	    } else if (wntvo && wntun) {
+
+/*              Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
+/*              M right singular vectors to be overwritten on A and */
+/*              no left singular vectors to be computed */
+
+/* Computing MAX */
+		i__2 = *m << 2;
+		if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                 Sufficient workspace for a fast algorithm */
+
+		    ir = 1;
+/* Computing MAX */
+		    i__2 = wrkbl, i__3 = *lda * *n + *m;
+		    if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */
+
+			ldwrku = *lda;
+			chunk = *n;
+			ldwrkr = *lda;
+		    } else /* if(complicated condition) */ {
+/* Computing MAX */
+			i__2 = wrkbl, i__3 = *lda * *n + *m;
+			if (*lwork >= f2cmax(i__2,i__3) + *m * *m) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is M by M */
+
+			    ldwrku = *lda;
+			    chunk = *n;
+			    ldwrkr = *m;
+			} else {
+
+/*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */
+
+			    ldwrku = *m;
+			    chunk = (*lwork - *m * *m - *m) / *m;
+			    ldwrkr = *m;
+			}
+		    }
+		    itau = ir + ldwrkr * *m;
+		    iwork = itau + *m;
+
+/*                 Compute A=L*Q */
+/*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__2, &ierr);
+
+/*                 Copy L to WORK(IR) and zero out above it */
+
+		    dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr);
+		    i__2 = *m - 1;
+		    i__3 = *m - 1;
+		    dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+			    ldwrkr], &ldwrkr);
+
+/*                 Generate Q in A */
+/*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__2, &ierr);
+		    ie = itau;
+		    itauq = ie + *m;
+		    itaup = itauq + *m;
+		    iwork = itaup + *m;
+
+/*                 Bidiagonalize L in WORK(IR) */
+/*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+
+/*                 Generate right vectors bidiagonalizing L */
+/*                 (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
+			    work[iwork], &i__2, &ierr);
+		    iwork = ie + *m;
+
+/*                 Perform bidiagonal QR iteration, computing right */
+/*                 singular vectors of L in WORK(IR) */
+/*                 (Workspace: need M*M + BDSPAC) */
+
+		    dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[
+			    ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork]
+			    , info);
+		    iu = ie + *m;
+
+/*                 Multiply right singular vectors of L in WORK(IR) by Q */
+/*                 in A, storing result in WORK(IU) and copying to A */
+/*                 (Workspace: need M*M + 2*M, prefer M*M + M*N + M) */
+
+		    i__2 = *n;
+		    i__3 = chunk;
+		    for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__3) {
+/* Computing MIN */
+			i__4 = *n - i__ + 1;
+			blk = f2cmin(i__4,chunk);
+			dgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], &
+				ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, &
+				work[iu], &ldwrku);
+			dlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * 
+				a_dim1 + 1], lda);
+/* L30: */
+		    }
+
+		} else {
+
+/*                 Insufficient workspace for a fast algorithm */
+
+		    ie = 1;
+		    itauq = ie + *m;
+		    itaup = itauq + *m;
+		    iwork = itaup + *m;
+
+/*                 Bidiagonalize A */
+/*                 (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__3, &ierr);
+
+/*                 Generate right vectors bidiagonalizing A */
+/*                 (Workspace: need 4*M, prefer 3*M + M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[iwork], &i__3, &ierr);
+		    iwork = ie + *m;
+
+/*                 Perform bidiagonal QR iteration, computing right */
+/*                 singular vectors of A in A */
+/*                 (Workspace: need BDSPAC) */
+
+		    dbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[
+			    a_offset], lda, dum, &c__1, dum, &c__1, &work[
+			    iwork], info);
+
+		}
+
+	    } else if (wntvo && wntuas) {
+
+/*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */
+/*              M right singular vectors to be overwritten on A and */
+/*              M left singular vectors to be computed in U */
+
+/* Computing MAX */
+		i__3 = *m << 2;
+		if (*lwork >= *m * *m + f2cmax(i__3,bdspac)) {
+
+/*                 Sufficient workspace for a fast algorithm */
+
+		    ir = 1;
+/* Computing MAX */
+		    i__3 = wrkbl, i__2 = *lda * *n + *m;
+		    if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */
+
+			ldwrku = *lda;
+			chunk = *n;
+			ldwrkr = *lda;
+		    } else /* if(complicated condition) */ {
+/* Computing MAX */
+			i__3 = wrkbl, i__2 = *lda * *n + *m;
+			if (*lwork >= f2cmax(i__3,i__2) + *m * *m) {
+
+/*                    WORK(IU) is LDA by N and WORK(IR) is M by M */
+
+			    ldwrku = *lda;
+			    chunk = *n;
+			    ldwrkr = *m;
+			} else {
+
+/*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */
+
+			    ldwrku = *m;
+			    chunk = (*lwork - *m * *m - *m) / *m;
+			    ldwrkr = *m;
+			}
+		    }
+		    itau = ir + ldwrkr * *m;
+		    iwork = itau + *m;
+
+/*                 Compute A=L*Q */
+/*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__3, &ierr);
+
+/*                 Copy L to U, zeroing about above it */
+
+		    dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		    i__3 = *m - 1;
+		    i__2 = *m - 1;
+		    dlaset_("U", &i__3, &i__2, &c_b57, &c_b57, &u[(u_dim1 << 
+			    1) + 1], ldu);
+
+/*                 Generate Q in A */
+/*                 (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__3, &ierr);
+		    ie = itau;
+		    itauq = ie + *m;
+		    itaup = itauq + *m;
+		    iwork = itaup + *m;
+
+/*                 Bidiagonalize L in U, copying result to WORK(IR) */
+/*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__3, &ierr);
+		    dlacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr);
+
+/*                 Generate right vectors bidiagonalizing L in WORK(IR) */
+/*                 (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
+			    work[iwork], &i__3, &ierr);
+
+/*                 Generate left vectors bidiagonalizing L in U */
+/*                 (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) */
+
+		    i__3 = *lwork - iwork + 1;
+		    dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
+			    work[iwork], &i__3, &ierr);
+		    iwork = ie + *m;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of L in U, and computing right */
+/*                 singular vectors of L in WORK(IR) */
+/*                 (Workspace: need M*M + BDSPAC) */
+
+		    dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], 
+			    &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[
+			    iwork], info);
+		    iu = ie + *m;
+
+/*                 Multiply right singular vectors of L in WORK(IR) by Q */
+/*                 in A, storing result in WORK(IU) and copying to A */
+/*                 (Workspace: need M*M + 2*M, prefer M*M + M*N + M)) */
+
+		    i__3 = *n;
+		    i__2 = chunk;
+		    for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
+			     i__2) {
+/* Computing MIN */
+			i__4 = *n - i__ + 1;
+			blk = f2cmin(i__4,chunk);
+			dgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], &
+				ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, &
+				work[iu], &ldwrku);
+			dlacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * 
+				a_dim1 + 1], lda);
+/* L40: */
+		    }
+
+		} else {
+
+/*                 Insufficient workspace for a fast algorithm */
+
+		    itau = 1;
+		    iwork = itau + *m;
+
+/*                 Compute A=L*Q */
+/*                 (Workspace: need 2*M, prefer M + M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
+			    , &i__2, &ierr);
+
+/*                 Copy L to U, zeroing out above it */
+
+		    dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		    i__2 = *m - 1;
+		    i__3 = *m - 1;
+		    dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 << 
+			    1) + 1], ldu);
+
+/*                 Generate Q in A */
+/*                 (Workspace: need 2*M, prefer M + M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
+			    iwork], &i__2, &ierr);
+		    ie = itau;
+		    itauq = ie + *m;
+		    itaup = itauq + *m;
+		    iwork = itaup + *m;
+
+/*                 Bidiagonalize L in U */
+/*                 (Workspace: need 4*M, prefer 3*M + 2*M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
+			    itauq], &work[itaup], &work[iwork], &i__2, &ierr);
+
+/*                 Multiply right vectors bidiagonalizing L by Q in A */
+/*                 (Workspace: need 3*M + N, prefer 3*M + N*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[
+			    itaup], &a[a_offset], lda, &work[iwork], &i__2, &
+			    ierr);
+
+/*                 Generate left vectors bidiagonalizing L in U */
+/*                 (Workspace: need 4*M, prefer 3*M + M*NB) */
+
+		    i__2 = *lwork - iwork + 1;
+		    dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
+			    work[iwork], &i__2, &ierr);
+		    iwork = ie + *m;
+
+/*                 Perform bidiagonal QR iteration, computing left */
+/*                 singular vectors of A in U and computing right */
+/*                 singular vectors of A in A */
+/*                 (Workspace: need BDSPAC) */
+
+		    dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[
+			    a_offset], lda, &u[u_offset], ldu, dum, &c__1, &
+			    work[iwork], info);
+
+		}
+
+	    } else if (wntvs) {
+
+		if (wntun) {
+
+/*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
+/*                 M right singular vectors to be computed in VT and */
+/*                 no left singular vectors to be computed */
+
+/* Computing MAX */
+		    i__2 = *m << 2;
+		    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			ir = 1;
+			if (*lwork >= wrkbl + *lda * *m) {
+
+/*                       WORK(IR) is LDA by M */
+
+			    ldwrkr = *lda;
+			} else {
+
+/*                       WORK(IR) is M by M */
+
+			    ldwrkr = *m;
+			}
+			itau = ir + ldwrkr * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q */
+/*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IR), zeroing out above it */
+
+			dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
+				ldwrkr);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+				ldwrkr], &ldwrkr);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IR) */
+/*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Generate right vectors bidiagonalizing L in */
+/*                    WORK(IR) */
+/*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing right */
+/*                    singular vectors of L in WORK(IR) */
+/*                    (Workspace: need M*M + BDSPAC) */
+
+			dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
+				work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IR) by */
+/*                    Q in A, storing result in VT */
+/*                    (Workspace: need M*M) */
+
+			dgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, 
+				&a[a_offset], lda, &c_b57, &vt[vt_offset], 
+				ldvt);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q */
+/*                    (Workspace: need 2*M, prefer M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy result to VT */
+
+			dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need 2*M, prefer M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Zero out above L in A */
+
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
+				<< 1) + 1], lda);
+
+/*                    Bidiagonalize L in A */
+/*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right vectors bidiagonalizing L by Q in VT */
+/*                    (Workspace: need 3*M + N, prefer 3*M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
+				vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
+				work[iwork], info);
+
+		    }
+
+		} else if (wntuo) {
+
+/*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
+/*                 M right singular vectors to be computed in VT and */
+/*                 M left singular vectors to be overwritten on A */
+
+/* Computing MAX */
+		    i__2 = *m << 2;
+		    if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + (*lda << 1) * *m) {
+
+/*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *lda;
+			} else if (*lwork >= wrkbl + (*lda + *m) * *m) {
+
+/*                       WORK(IU) is LDA by M and WORK(IR) is M by M */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *m;
+			} else {
+
+/*                       WORK(IU) is M by M and WORK(IR) is M by M */
+
+			    ldwrku = *m;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *m;
+			}
+			itau = ir + ldwrkr * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q */
+/*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IU), zeroing out below it */
+
+			dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				ldwrku], &ldwrku);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IU), copying result to */
+/*                    WORK(IR) */
+/*                    (Workspace: need 2*M*M + 4*M, */
+/*                                prefer 2*M*M+3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			dlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
+				ldwrkr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need 2*M*M + 4*M-1, */
+/*                                prefer 2*M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of L in WORK(IR) and computing */
+/*                    right singular vectors of L in WORK(IU) */
+/*                    (Workspace: need 2*M*M + BDSPAC) */
+
+			dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
+				iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 
+				&work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IU) by */
+/*                    Q in A, storing result in VT */
+/*                    (Workspace: need M*M) */
+
+			dgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
+				&a[a_offset], lda, &c_b57, &vt[vt_offset], 
+				ldvt);
+
+/*                    Copy left singular vectors of L to A */
+/*                    (Workspace: need M*M) */
+
+			dlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
+				lda);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need 2*M, prefer M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Zero out above L in A */
+
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
+				<< 1) + 1], lda);
+
+/*                    Bidiagonalize L in A */
+/*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right vectors bidiagonalizing L by Q in VT */
+/*                    (Workspace: need 3*M + N, prefer 3*M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors of L in A */
+/*                    (Workspace: need 4*M, prefer 3*M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, compute left */
+/*                    singular vectors of A in A and compute right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &a[a_offset], lda, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		} else if (wntuas) {
+
+/*                 Path 6t(N much larger than M, JOBU='S' or 'A', */
+/*                         JOBVT='S') */
+/*                 M right singular vectors to be computed in VT and */
+/*                 M left singular vectors to be computed in U */
+
+/* Computing MAX */
+		    i__2 = *m << 2;
+		    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + *lda * *m) {
+
+/*                       WORK(IU) is LDA by N */
+
+			    ldwrku = *lda;
+			} else {
+
+/*                       WORK(IU) is LDA by M */
+
+			    ldwrku = *m;
+			}
+			itau = iu + ldwrku * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q */
+/*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IU), zeroing out above it */
+
+			dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				ldwrku], &ldwrku);
+
+/*                    Generate Q in A */
+/*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IU), copying result to U */
+/*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			dlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
+				ldu);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need M*M + 4*M-1, */
+/*                                prefer M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in U */
+/*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of L in U and computing right */
+/*                    singular vectors of L in WORK(IU) */
+/*                    (Workspace: need M*M + BDSPAC) */
+
+			dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
+				iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IU) by */
+/*                    Q in A, storing result in VT */
+/*                    (Workspace: need M*M) */
+
+			dgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
+				&a[a_offset], lda, &c_b57, &vt[vt_offset], 
+				ldvt);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need 2*M, prefer M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy L to U, zeroing out above it */
+
+			dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 
+				<< 1) + 1], ldu);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in U */
+/*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right bidiagonalizing vectors in U by Q */
+/*                    in VT */
+/*                    (Workspace: need 3*M + N, prefer 3*M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in U */
+/*                    (Workspace: need 4*M, prefer 3*M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &u[u_offset], ldu, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		}
+
+	    } else if (wntva) {
+
+		if (wntun) {
+
+/*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
+/*                 N right singular vectors to be computed in VT and */
+/*                 no left singular vectors to be computed */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			ir = 1;
+			if (*lwork >= wrkbl + *lda * *m) {
+
+/*                       WORK(IR) is LDA by M */
+
+			    ldwrkr = *lda;
+			} else {
+
+/*                       WORK(IR) is M by M */
+
+			    ldwrkr = *m;
+			}
+			itau = ir + ldwrkr * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Copy L to WORK(IR), zeroing out above it */
+
+			dlacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
+				ldwrkr);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
+				ldwrkr], &ldwrkr);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M*M + M + N, prefer M*M + M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IR) */
+/*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need M*M + 4*M-1, */
+/*                                prefer M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing right */
+/*                    singular vectors of L in WORK(IR) */
+/*                    (Workspace: need M*M + BDSPAC) */
+
+			dbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
+				work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IR) by */
+/*                    Q in VT, storing result in A */
+/*                    (Workspace: need M*M) */
+
+			dgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, 
+				&vt[vt_offset], ldvt, &c_b57, &a[a_offset], 
+				lda);
+
+/*                    Copy right singular vectors of A from A to VT */
+
+			dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M + N, prefer M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Zero out above L in A */
+
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
+				<< 1) + 1], lda);
+
+/*                    Bidiagonalize L in A */
+/*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right bidiagonalizing vectors in A by Q */
+/*                    in VT */
+/*                    (Workspace: need 3*M + N, prefer 3*M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
+				vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
+				work[iwork], info);
+
+		    }
+
+		} else if (wntuo) {
+
+/*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
+/*                 N right singular vectors to be computed in VT and */
+/*                 M left singular vectors to be overwritten on A */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + (*lda << 1) * *m) {
+
+/*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *lda;
+			} else if (*lwork >= wrkbl + (*lda + *m) * *m) {
+
+/*                       WORK(IU) is LDA by M and WORK(IR) is M by M */
+
+			    ldwrku = *lda;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *m;
+			} else {
+
+/*                       WORK(IU) is M by M and WORK(IR) is M by M */
+
+			    ldwrku = *m;
+			    ir = iu + ldwrku * *m;
+			    ldwrkr = *m;
+			}
+			itau = ir + ldwrkr * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IU), zeroing out above it */
+
+			dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				ldwrku], &ldwrku);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IU), copying result to */
+/*                    WORK(IR) */
+/*                    (Workspace: need 2*M*M + 4*M, */
+/*                                prefer 2*M*M+3*M+2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			dlacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
+				ldwrkr);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need 2*M*M + 4*M-1, */
+/*                                prefer 2*M*M+3*M+(M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in WORK(IR) */
+/*                    (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
+				, &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of L in WORK(IR) and computing */
+/*                    right singular vectors of L in WORK(IU) */
+/*                    (Workspace: need 2*M*M + BDSPAC) */
+
+			dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
+				iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 
+				&work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IU) by */
+/*                    Q in VT, storing result in A */
+/*                    (Workspace: need M*M) */
+
+			dgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
+				&vt[vt_offset], ldvt, &c_b57, &a[a_offset], 
+				lda);
+
+/*                    Copy right singular vectors of A from A to VT */
+
+			dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Copy left singular vectors of A from WORK(IR) to A */
+
+			dlacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
+				lda);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M + N, prefer M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Zero out above L in A */
+
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
+				<< 1) + 1], lda);
+
+/*                    Bidiagonalize L in A */
+/*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right bidiagonalizing vectors in A by Q */
+/*                    in VT */
+/*                    (Workspace: need 3*M + N, prefer 3*M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in A */
+/*                    (Workspace: need 4*M, prefer 3*M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in A and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &a[a_offset], lda, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		} else if (wntuas) {
+
+/*                 Path 9t(N much larger than M, JOBU='S' or 'A', */
+/*                         JOBVT='A') */
+/*                 N right singular vectors to be computed in VT and */
+/*                 M left singular vectors to be computed in U */
+
+/* Computing MAX */
+		    i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
+		    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
+
+/*                    Sufficient workspace for a fast algorithm */
+
+			iu = 1;
+			if (*lwork >= wrkbl + *lda * *m) {
+
+/*                       WORK(IU) is LDA by M */
+
+			    ldwrku = *lda;
+			} else {
+
+/*                       WORK(IU) is M by M */
+
+			    ldwrku = *m;
+			}
+			itau = iu + ldwrku * *m;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M*M + M + N, prefer M*M + M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy L to WORK(IU), zeroing out above it */
+
+			dlacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
+				ldwrku);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
+				ldwrku], &ldwrku);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in WORK(IU), copying result to U */
+/*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+			dlacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
+				ldu);
+
+/*                    Generate right bidiagonalizing vectors in WORK(IU) */
+/*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
+				, &work[iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in U */
+/*                    (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of L in U and computing right */
+/*                    singular vectors of L in WORK(IU) */
+/*                    (Workspace: need M*M + BDSPAC) */
+
+			dbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
+				iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
+				work[iwork], info);
+
+/*                    Multiply right singular vectors of L in WORK(IU) by */
+/*                    Q in VT, storing result in A */
+/*                    (Workspace: need M*M) */
+
+			dgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
+				&vt[vt_offset], ldvt, &c_b57, &a[a_offset], 
+				lda);
+
+/*                    Copy right singular vectors of A from A to VT */
+
+			dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+		    } else {
+
+/*                    Insufficient workspace for a fast algorithm */
+
+			itau = 1;
+			iwork = itau + *m;
+
+/*                    Compute A=L*Q, copying result to VT */
+/*                    (Workspace: need 2*M, prefer M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
+				iwork], &i__2, &ierr);
+			dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
+				ldvt);
+
+/*                    Generate Q in VT */
+/*                    (Workspace: need M + N, prefer M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
+				work[iwork], &i__2, &ierr);
+
+/*                    Copy L to U, zeroing out above it */
+
+			dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
+				ldu);
+			i__2 = *m - 1;
+			i__3 = *m - 1;
+			dlaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 
+				<< 1) + 1], ldu);
+			ie = itau;
+			itauq = ie + *m;
+			itaup = itauq + *m;
+			iwork = itaup + *m;
+
+/*                    Bidiagonalize L in U */
+/*                    (Workspace: need 4*M, prefer 3*M + 2*M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
+				work[itauq], &work[itaup], &work[iwork], &
+				i__2, &ierr);
+
+/*                    Multiply right bidiagonalizing vectors in U by Q */
+/*                    in VT */
+/*                    (Workspace: need 3*M + N, prefer 3*M + N*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
+				work[itaup], &vt[vt_offset], ldvt, &work[
+				iwork], &i__2, &ierr);
+
+/*                    Generate left bidiagonalizing vectors in U */
+/*                    (Workspace: need 4*M, prefer 3*M + M*NB) */
+
+			i__2 = *lwork - iwork + 1;
+			dorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
+				 &work[iwork], &i__2, &ierr);
+			iwork = ie + *m;
+
+/*                    Perform bidiagonal QR iteration, computing left */
+/*                    singular vectors of A in U and computing right */
+/*                    singular vectors of A in VT */
+/*                    (Workspace: need BDSPAC) */
+
+			dbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
+				vt_offset], ldvt, &u[u_offset], ldu, dum, &
+				c__1, &work[iwork], info);
+
+		    }
+
+		}
+
+	    }
+
+	} else {
+
+/*           N .LT. MNTHR */
+
+/*           Path 10t(N greater than M, but not much larger) */
+/*           Reduce to bidiagonal form without LQ decomposition */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    iwork = itaup + *m;
+
+/*           Bidiagonalize A */
+/*           (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB) */
+
+	    i__2 = *lwork - iwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[iwork], &i__2, &ierr);
+	    if (wntuas) {
+
+/*              If left singular vectors desired in U, copy result to U */
+/*              and generate left bidiagonalizing vectors in U */
+/*              (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB) */
+
+		dlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__2 = *lwork - iwork + 1;
+		dorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    if (wntvas) {
+
+/*              If right singular vectors desired in VT, copy result to */
+/*              VT and generate right bidiagonalizing vectors in VT */
+/*              (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB) */
+
+		dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		if (wntva) {
+		    nrvt = *n;
+		}
+		if (wntvs) {
+		    nrvt = *m;
+		}
+		i__2 = *lwork - iwork + 1;
+		dorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], 
+			&work[iwork], &i__2, &ierr);
+	    }
+	    if (wntuo) {
+
+/*              If left singular vectors desired in A, generate left */
+/*              bidiagonalizing vectors in A */
+/*              (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		dorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    if (wntvo) {
+
+/*              If right singular vectors desired in A, generate right */
+/*              bidiagonalizing vectors in A */
+/*              (Workspace: need 4*M, prefer 3*M + M*NB) */
+
+		i__2 = *lwork - iwork + 1;
+		dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
+			iwork], &i__2, &ierr);
+	    }
+	    iwork = ie + *m;
+	    if (wntuas || wntuo) {
+		nru = *m;
+	    }
+	    if (wntun) {
+		nru = 0;
+	    }
+	    if (wntvas || wntvo) {
+		ncvt = *n;
+	    }
+	    if (wntvn) {
+		ncvt = 0;
+	    }
+	    if (! wntuo && ! wntvo) {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in U and computing right singular */
+/*              vectors in VT */
+/*              (Workspace: need BDSPAC) */
+
+		dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
+			vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
+			work[iwork], info);
+	    } else if (! wntuo && wntvo) {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in U and computing right singular */
+/*              vectors in A */
+/*              (Workspace: need BDSPAC) */
+
+		dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
+			a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
+			iwork], info);
+	    } else {
+
+/*              Perform bidiagonal QR iteration, if desired, computing */
+/*              left singular vectors in A and computing right singular */
+/*              vectors in VT */
+/*              (Workspace: need BDSPAC) */
+
+		dbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
+			vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
+			work[iwork], info);
+	    }
+
+	}
+
+    }
+
+/*     If DBDSQR failed to converge, copy unconverged superdiagonals */
+/*     to WORK( 2:MINMN ) */
+
+    if (*info != 0) {
+	if (ie > 2) {
+	    i__2 = minmn - 1;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__ + 1] = work[i__ + ie - 1];
+/* L50: */
+	    }
+	}
+	if (ie < 2) {
+	    for (i__ = minmn - 1; i__ >= 1; --i__) {
+		work[i__ + 1] = work[i__ + ie - 1];
+/* L60: */
+	    }
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (*info != 0 && anrm > bignum) {
+	    i__2 = minmn - 1;
+	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2],
+		     &minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (*info != 0 && anrm < smlnum) {
+	    i__2 = minmn - 1;
+	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2],
+		     &minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (doublereal) maxwrk;
+
+    return 0;
+
+/*     End of DGESVD */
+
+} /* dgesvd_ */
+
diff --git a/lapack-netlib/SRC/dgesvdq.c b/lapack-netlib/SRC/dgesvdq.c
new file mode 100644
index 000000000..f85b915d3
--- /dev/null
+++ b/lapack-netlib/SRC/dgesvdq.c
@@ -0,0 +1,2128 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c__1 = 1;
+static doublereal c_b72 = 0.;
+static doublereal c_b76 = 1.;
+static integer c__0 = 0;
+static logical c_false = FALSE_;
+
+/* > \brief <b> DGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method
+ for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGESVDQ + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvdq
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvdq
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvdq
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*      SUBROUTINE DGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, */
+/*                          S, U, LDU, V, LDV, NUMRANK, IWORK, LIWORK, */
+/*                          WORK, LWORK, RWORK, LRWORK, INFO ) */
+
+/*      IMPLICIT    NONE */
+/*      CHARACTER   JOBA, JOBP, JOBR, JOBU, JOBV */
+/*      INTEGER     M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LWORK, LRWORK, */
+/*                  INFO */
+/*      DOUBLE PRECISION  A( LDA, * ), U( LDU, * ), V( LDV, * ), WORK( * ) */
+/*      DOUBLE PRECISION  S( * ), RWORK( * ) */
+/*      INTEGER     IWORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGESVDQ computes the singular value decomposition (SVD) of a real */
+/* > M-by-N matrix A, where M >= N. The SVD of A is written as */
+/* >                                    [++]   [xx]   [x0]   [xx] */
+/* >              A = U * SIGMA * V^*,  [++] = [xx] * [ox] * [xx] */
+/* >                                    [++]   [xx] */
+/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */
+/* > matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */
+/* > of SIGMA are the singular values of A. The columns of U and V are the */
+/* > left and the right singular vectors of A, respectively. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBA */
+/* > \verbatim */
+/* >  JOBA is CHARACTER*1 */
+/* >  Specifies the level of accuracy in the computed SVD */
+/* >  = 'A' The requested accuracy corresponds to having the backward */
+/* >        error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F, */
+/* >        where EPS = DLAMCH('Epsilon'). This authorises DGESVDQ to */
+/* >        truncate the computed triangular factor in a rank revealing */
+/* >        QR factorization whenever the truncated part is below the */
+/* >        threshold of the order of EPS * ||A||_F. This is aggressive */
+/* >        truncation level. */
+/* >  = 'M' Similarly as with 'A', but the truncation is more gentle: it */
+/* >        is allowed only when there is a drop on the diagonal of the */
+/* >        triangular factor in the QR factorization. This is medium */
+/* >        truncation level. */
+/* >  = 'H' High accuracy requested. No numerical rank determination based */
+/* >        on the rank revealing QR factorization is attempted. */
+/* >  = 'E' Same as 'H', and in addition the condition number of column */
+/* >        scaled A is estimated and returned in  RWORK(1). */
+/* >        N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBP */
+/* > \verbatim */
+/* >  JOBP is CHARACTER*1 */
+/* >  = 'P' The rows of A are ordered in decreasing order with respect to */
+/* >        ||A(i,:)||_\infty. This enhances numerical accuracy at the cost */
+/* >        of extra data movement. Recommended for numerical robustness. */
+/* >  = 'N' No row pivoting. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBR */
+/* > \verbatim */
+/* >          JOBR is CHARACTER*1 */
+/* >          = 'T' After the initial pivoted QR factorization, DGESVD is applied to */
+/* >          the transposed R**T of the computed triangular factor R. This involves */
+/* >          some extra data movement (matrix transpositions). Useful for */
+/* >          experiments, research and development. */
+/* >          = 'N' The triangular factor R is given as input to DGESVD. This may be */
+/* >          preferred as it involves less data movement. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          = 'A' All M left singular vectors are computed and returned in the */
+/* >          matrix U. See the description of U. */
+/* >          = 'S' or 'U' N = f2cmin(M,N) left singular vectors are computed and returned */
+/* >          in the matrix U. See the description of U. */
+/* >          = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular */
+/* >          vectors are computed and returned in the matrix U. */
+/* >          = 'F' The N left singular vectors are returned in factored form as the */
+/* >          product of the Q factor from the initial QR factorization and the */
+/* >          N left singular vectors of (R**T , 0)**T. If row pivoting is used, */
+/* >          then the necessary information on the row pivoting is stored in */
+/* >          IWORK(N+1:N+M-1). */
+/* >          = 'N' The left singular vectors are not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >          = 'A', 'V' All N right singular vectors are computed and returned in */
+/* >          the matrix V. */
+/* >          = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular */
+/* >          vectors are computed and returned in the matrix V. This option is */
+/* >          allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal. */
+/* >          = 'N' The right singular vectors are not computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A.  M >= N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array of dimensions LDA x N */
+/* >          On entry, the input matrix A. */
+/* >          On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains */
+/* >          the Householder vectors as stored by DGEQP3. If JOBU = 'F', these Householder */
+/* >          vectors together with WORK(1:N) can be used to restore the Q factors from */
+/* >          the initial pivoted QR factorization of A. See the description of U. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER. */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is DOUBLE PRECISION array of dimension N. */
+/* >          The singular values of A, ordered so that S(i) >= S(i+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is DOUBLE PRECISION array, dimension */
+/* >          LDU x M if JOBU = 'A'; see the description of LDU. In this case, */
+/* >          on exit, U contains the M left singular vectors. */
+/* >          LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this */
+/* >          case, U contains the leading N or the leading NUMRANK left singular vectors. */
+/* >          LDU x N if JOBU = 'F' ; see the description of LDU. In this case U */
+/* >          contains N x N orthogonal matrix that can be used to form the left */
+/* >          singular vectors. */
+/* >          If JOBU = 'N', U is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER. */
+/* >          The leading dimension of the array U. */
+/* >          If JOBU = 'A', 'S', 'U', 'R',  LDU >= f2cmax(1,M). */
+/* >          If JOBU = 'F',                 LDU >= f2cmax(1,N). */
+/* >          Otherwise,                     LDU >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] V */
+/* > \verbatim */
+/* >          V is DOUBLE PRECISION array, dimension */
+/* >          LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' . */
+/* >          If JOBV = 'A', or 'V',  V contains the N-by-N orthogonal matrix  V**T; */
+/* >          If JOBV = 'R', V contains the first NUMRANK rows of V**T (the right */
+/* >          singular vectors, stored rowwise, of the NUMRANK largest singular values). */
+/* >          If JOBV = 'N' and JOBA = 'E', V is used as a workspace. */
+/* >          If JOBV = 'N', and JOBA.NE.'E', V is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V. */
+/* >          If JOBV = 'A', 'V', 'R',  or JOBA = 'E', LDV >= f2cmax(1,N). */
+/* >          Otherwise,                               LDV >= 1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] NUMRANK */
+/* > \verbatim */
+/* >          NUMRANK is INTEGER */
+/* >          NUMRANK is the numerical rank first determined after the rank */
+/* >          revealing QR factorization, following the strategy specified by the */
+/* >          value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK */
+/* >          leading singular values and vectors are then requested in the call */
+/* >          of DGESVD. The final value of NUMRANK might be further reduced if */
+/* >          some singular values are computed as zeros. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (f2cmax(1, LIWORK)). */
+/* >          On exit, IWORK(1:N) contains column pivoting permutation of the */
+/* >          rank revealing QR factorization. */
+/* >          If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence */
+/* >          of row swaps used in row pivoting. These can be used to restore the */
+/* >          left singular vectors in the case JOBU = 'F'. */
+/* > */
+/* >          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */
+/* >          LIWORK(1) returns the minimal LIWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LIWORK */
+/* > \verbatim */
+/* >          LIWORK is INTEGER */
+/* >          The dimension of the array IWORK. */
+/* >          LIWORK >= N + M - 1,     if JOBP = 'P' and JOBA .NE. 'E'; */
+/* >          LIWORK >= N              if JOBP = 'N' and JOBA .NE. 'E'; */
+/* >          LIWORK >= N + M - 1 + N, if JOBP = 'P' and JOBA = 'E'; */
+/* >          LIWORK >= N + N          if JOBP = 'N' and JOBA = 'E'. */
+
+/* >          If LIWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates and returns the optimal and minimal sizes */
+/* >          for the WORK, IWORK, and RWORK arrays, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (f2cmax(2, LWORK)), used as a workspace. */
+/* >          On exit, if, on entry, LWORK.NE.-1, WORK(1:N) contains parameters */
+/* >          needed to recover the Q factor from the QR factorization computed by */
+/* >          DGEQP3. */
+/* > */
+/* >          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */
+/* >          WORK(1) returns the optimal LWORK, and */
+/* >          WORK(2) returns the minimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. It is determined as follows: */
+/* >          Let  LWQP3 = 3*N+1,  LWCON = 3*N, and let */
+/* >          LWORQ = { MAX( N, 1 ),  if JOBU = 'R', 'S', or 'U' */
+/* >                  { MAX( M, 1 ),  if JOBU = 'A' */
+/* >          LWSVD = MAX( 5*N, 1 ) */
+/* >          LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 5*(N/2), 1 ), LWORLQ = MAX( N, 1 ), */
+/* >          LWQRF = MAX( N/2, 1 ), LWORQ2 = MAX( N, 1 ) */
+/* >          Then the minimal value of LWORK is: */
+/* >          = MAX( N + LWQP3, LWSVD )        if only the singular values are needed; */
+/* >          = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed, */
+/* >                                   and a scaled condition estimate requested; */
+/* > */
+/* >          = N + MAX( LWQP3, LWSVD, LWORQ ) if the singular values and the left */
+/* >                                   singular vectors are requested; */
+/* >          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the singular values and the left */
+/* >                                   singular vectors are requested, and also */
+/* >                                   a scaled condition estimate requested; */
+/* > */
+/* >          = N + MAX( LWQP3, LWSVD )        if the singular values and the right */
+/* >                                   singular vectors are requested; */
+/* >          = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right */
+/* >                                   singular vectors are requested, and also */
+/* >                                   a scaled condition etimate requested; */
+/* > */
+/* >          = N + MAX( LWQP3, LWSVD, LWORQ ) if the full SVD is requested with JOBV = 'R'; */
+/* >                                   independent of JOBR; */
+/* >          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the full SVD is requested, */
+/* >                                   JOBV = 'R' and, also a scaled condition */
+/* >                                   estimate requested; independent of JOBR; */
+/* >          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ), */
+/* >         N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ) ) if the */
+/* >                         full SVD is requested with JOBV = 'A' or 'V', and */
+/* >                         JOBR ='N' */
+/* >          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ), */
+/* >         N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ ) ) */
+/* >                         if the full SVD is requested with JOBV = 'A' or 'V', and */
+/* >                         JOBR ='N', and also a scaled condition number estimate */
+/* >                         requested. */
+/* >          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ), */
+/* >         N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) if the */
+/* >                         full SVD is requested with JOBV = 'A', 'V', and JOBR ='T' */
+/* >          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ), */
+/* >         N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) */
+/* >                         if the full SVD is requested with JOBV = 'A' or 'V', and */
+/* >                         JOBR ='T', and also a scaled condition number estimate */
+/* >                         requested. */
+/* >          Finally, LWORK must be at least two: LWORK = MAX( 2, LWORK ). */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates and returns the optimal and minimal sizes */
+/* >          for the WORK, IWORK, and RWORK arrays, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RWORK */
+/* > \verbatim */
+/* >          RWORK is DOUBLE PRECISION array, dimension (f2cmax(1, LRWORK)). */
+/* >          On exit, */
+/* >          1. If JOBA = 'E', RWORK(1) contains an estimate of the condition */
+/* >          number of column scaled A. If A = C * D where D is diagonal and C */
+/* >          has unit columns in the Euclidean norm, then, assuming full column rank, */
+/* >          N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1). */
+/* >          Otherwise, RWORK(1) = -1. */
+/* >          2. RWORK(2) contains the number of singular values computed as */
+/* >          exact zeros in DGESVD applied to the upper triangular or trapeziodal */
+/* >          R (from the initial QR factorization). In case of early exit (no call to */
+/* >          DGESVD, such as in the case of zero matrix) RWORK(2) = -1. */
+/* > */
+/* >          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */
+/* >          RWORK(1) returns the minimal LRWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LRWORK */
+/* > \verbatim */
+/* >          LRWORK is INTEGER. */
+/* >          The dimension of the array RWORK. */
+/* >          If JOBP ='P', then LRWORK >= MAX(2, M). */
+/* >          Otherwise, LRWORK >= 2 */
+
+/* >          If LRWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates and returns the optimal and minimal sizes */
+/* >          for the WORK, IWORK, and RWORK arrays, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
+/* >          > 0:  if DBDSQR did not converge, INFO specifies how many superdiagonals */
+/* >          of an intermediate bidiagonal form B (computed in DGESVD) did not */
+/* >          converge to zero. */
+/* > \endverbatim */
+
+/* > \par Further Details: */
+/*  ======================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >   1. The data movement (matrix transpose) is coded using simple nested */
+/* >   DO-loops because BLAS and LAPACK do not provide corresponding subroutines. */
+/* >   Those DO-loops are easily identified in this source code - by the CONTINUE */
+/* >   statements labeled with 11**. In an optimized version of this code, the */
+/* >   nested DO loops should be replaced with calls to an optimized subroutine. */
+/* >   2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause */
+/* >   column norm overflow. This is the minial precaution and it is left to the */
+/* >   SVD routine (CGESVD) to do its own preemptive scaling if potential over- */
+/* >   or underflows are detected. To avoid repeated scanning of the array A, */
+/* >   an optimal implementation would do all necessary scaling before calling */
+/* >   CGESVD and the scaling in CGESVD can be switched off. */
+/* >   3. Other comments related to code optimization are given in comments in the */
+/* >   code, enlosed in [[double brackets]]. */
+/* > \endverbatim */
+
+/* > \par Bugs, examples and comments */
+/*  =========================== */
+
+/* > \verbatim */
+/* >  Please report all bugs and send interesting examples and/or comments to */
+/* >  drmac@math.hr. Thank you. */
+/* > \endverbatim */
+
+/* > \par References */
+/*  =============== */
+
+/* > \verbatim */
+/* >  [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for */
+/* >      Computing the SVD with High Accuracy. ACM Trans. Math. Softw. */
+/* >      44(1): 11:1-11:30 (2017) */
+/* > */
+/* >  SIGMA library, xGESVDQ section updated February 2016. */
+/* >  Developed and coded by Zlatko Drmac, Department of Mathematics */
+/* >  University of Zagreb, Croatia, drmac@math.hr */
+/* > \endverbatim */
+
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* > \verbatim */
+/* > Developed and coded by Zlatko Drmac, Department of Mathematics */
+/* >  University of Zagreb, Croatia, drmac@math.hr */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date November 2018 */
+
+/* > \ingroup doubleGEsing */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgesvdq_(char *joba, char *jobp, char *jobr, char *jobu, 
+	char *jobv, integer *m, integer *n, doublereal *a, integer *lda, 
+	doublereal *s, doublereal *u, integer *ldu, doublereal *v, integer *
+	ldv, integer *numrank, integer *iwork, integer *liwork, doublereal *
+	work, integer *lwork, doublereal *rwork, integer *lrwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    integer lwrk_dormqr__, lwrk_dgesvd2__, ierr, lwrk_dormqr2__;
+    doublereal rtmp;
+    integer optratio;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    logical lsvc0, accla;
+    integer lwqp3;
+    logical acclh, acclm;
+    integer p, q;
+    logical conda;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    extern logical lsame_(char *, char *);
+    integer iwoff;
+    logical lsvec;
+    doublereal sfmin, epsln;
+    integer lwcon;
+    logical rsvec;
+    integer lwlqf, lwqrf, n1, lwsvd;
+    logical dntwu, dntwv, wntua;
+    integer lworq;
+    logical wntuf, wntva, wntur, wntus, wntvr;
+    extern /* Subroutine */ int dgeqp3_(integer *, integer *, doublereal *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    integer *);
+    integer lwsvd2, lworq2;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    integer nr;
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *), 
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *, 
+	    integer *, integer *, doublereal *, integer *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    doublereal sconda;
+    extern /* Subroutine */ int dgeqrf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *), 
+	    dgesvd_(char *, char *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *), 
+	    dlacpy_(char *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *), dlaset_(char *, integer *, 
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen), dlapmt_(logical *, 
+	    integer *, integer *, doublereal *, integer *, integer *), 
+	    dpocon_(char *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, doublereal *, integer *, integer *), 
+	    dlaswp_(integer *, doublereal *, integer *, integer *, integer *, 
+	    integer *, integer *), dormlq_(char *, char *, integer *, integer 
+	    *, integer *, doublereal *, integer *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, integer *), 
+	    dormqr_(char *, char *, integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *);
+    integer minwrk;
+    logical rtrans;
+    doublereal rdummy[1];
+    integer lworlq;
+    logical lquery;
+    integer optwrk;
+    logical rowprm;
+    doublereal big;
+    integer minwrk2;
+    logical ascaled;
+    integer lwrk_dgeqp3__, optwrk2, lwrk_dgelqf__, iminwrk, lwrk_dgeqrf__, 
+	    rminwrk, lwrk_dgesvd__, lwrk_dormlq__;
+
+
+/*  ===================================================================== */
+
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    --iwork;
+    --work;
+    --rwork;
+
+    /* Function Body */
+    wntus = lsame_(jobu, "S") || lsame_(jobu, "U");
+    wntur = lsame_(jobu, "R");
+    wntua = lsame_(jobu, "A");
+    wntuf = lsame_(jobu, "F");
+    lsvc0 = wntus || wntur || wntua;
+    lsvec = lsvc0 || wntuf;
+    dntwu = lsame_(jobu, "N");
+
+    wntvr = lsame_(jobv, "R");
+    wntva = lsame_(jobv, "A") || lsame_(jobv, "V");
+    rsvec = wntvr || wntva;
+    dntwv = lsame_(jobv, "N");
+
+    accla = lsame_(joba, "A");
+    acclm = lsame_(joba, "M");
+    conda = lsame_(joba, "E");
+    acclh = lsame_(joba, "H") || conda;
+
+    rowprm = lsame_(jobp, "P");
+    rtrans = lsame_(jobr, "T");
+
+    if (rowprm) {
+	if (conda) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n + *m - 1 + *n;
+	    iminwrk = f2cmax(i__1,i__2);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n + *m - 1;
+	    iminwrk = f2cmax(i__1,i__2);
+	}
+	rminwrk = f2cmax(2,*m);
+    } else {
+	if (conda) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n + *n;
+	    iminwrk = f2cmax(i__1,i__2);
+	} else {
+	    iminwrk = f2cmax(1,*n);
+	}
+	rminwrk = 2;
+    }
+    lquery = *liwork == -1 || *lwork == -1 || *lrwork == -1;
+    *info = 0;
+    if (! (accla || acclm || acclh)) {
+	*info = -1;
+    } else if (! (rowprm || lsame_(jobp, "N"))) {
+	*info = -2;
+    } else if (! (rtrans || lsame_(jobr, "N"))) {
+	*info = -3;
+    } else if (! (lsvec || dntwu)) {
+	*info = -4;
+    } else if (wntur && wntva) {
+	*info = -5;
+    } else if (! (rsvec || dntwv)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (*n < 0 || *n > *m) {
+	*info = -7;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -9;
+    } else if (*ldu < 1 || lsvc0 && *ldu < *m || wntuf && *ldu < *n) {
+	*info = -12;
+    } else if (*ldv < 1 || rsvec && *ldv < *n || conda && *ldv < *n) {
+	*info = -14;
+    } else if (*liwork < iminwrk && ! lquery) {
+	*info = -17;
+    }
+
+
+    if (*info == 0) {
+/*        [[The expressions for computing the minimal and the optimal */
+/*        values of LWORK are written with a lot of redundancy and */
+/*        can be simplified. However, this detailed form is easier for */
+/*        maintenance and modifications of the code.]] */
+
+	lwqp3 = *n * 3 + 1;
+	if (wntus || wntur) {
+	    lworq = f2cmax(*n,1);
+	} else if (wntua) {
+	    lworq = f2cmax(*m,1);
+	}
+	lwcon = *n * 3;
+/* Computing MAX */
+	i__1 = *n * 5;
+	lwsvd = f2cmax(i__1,1);
+	if (lquery) {
+	    dgeqp3_(m, n, &a[a_offset], lda, &iwork[1], rdummy, rdummy, &c_n1,
+		     &ierr);
+	    lwrk_dgeqp3__ = (integer) rdummy[0];
+	    if (wntus || wntur) {
+		dormqr_("L", "N", m, n, n, &a[a_offset], lda, rdummy, &u[
+			u_offset], ldu, rdummy, &c_n1, &ierr);
+		lwrk_dormqr__ = (integer) rdummy[0];
+	    } else if (wntua) {
+		dormqr_("L", "N", m, m, n, &a[a_offset], lda, rdummy, &u[
+			u_offset], ldu, rdummy, &c_n1, &ierr);
+		lwrk_dormqr__ = (integer) rdummy[0];
+	    } else {
+		lwrk_dormqr__ = 0;
+	    }
+	}
+	minwrk = 2;
+	optwrk = 2;
+	if (! (lsvec || rsvec)) {
+/*            only the singular values are requested */
+	    if (conda) {
+/* Computing MAX */
+		i__1 = *n + lwqp3, i__1 = f2cmax(i__1,lwcon);
+		minwrk = f2cmax(i__1,lwsvd);
+	    } else {
+/* Computing MAX */
+		i__1 = *n + lwqp3;
+		minwrk = f2cmax(i__1,lwsvd);
+	    }
+	    if (lquery) {
+		dgesvd_("N", "N", n, n, &a[a_offset], lda, &s[1], &u[u_offset]
+			, ldu, &v[v_offset], ldv, rdummy, &c_n1, &ierr);
+		lwrk_dgesvd__ = (integer) rdummy[0];
+		if (conda) {
+/* Computing MAX */
+		    i__1 = *n + lwrk_dgeqp3__, i__2 = *n + lwcon, i__1 = f2cmax(
+			    i__1,i__2);
+		    optwrk = f2cmax(i__1,lwrk_dgesvd__);
+		} else {
+/* Computing MAX */
+		    i__1 = *n + lwrk_dgeqp3__;
+		    optwrk = f2cmax(i__1,lwrk_dgesvd__);
+		}
+	    }
+	} else if (lsvec && ! rsvec) {
+/*            singular values and the left singular vectors are requested */
+	    if (conda) {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwcon), i__1 = f2cmax(i__1,lwsvd);
+		minwrk = *n + f2cmax(i__1,lworq);
+	    } else {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwsvd);
+		minwrk = *n + f2cmax(i__1,lworq);
+	    }
+	    if (lquery) {
+		if (rtrans) {
+		    dgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		} else {
+		    dgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		}
+		lwrk_dgesvd__ = (integer) rdummy[0];
+		if (conda) {
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_dgeqp3__,lwcon), i__1 = f2cmax(i__1,
+			    lwrk_dgesvd__);
+		    optwrk = *n + f2cmax(i__1,lwrk_dormqr__);
+		} else {
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_dgeqp3__,lwrk_dgesvd__);
+		    optwrk = *n + f2cmax(i__1,lwrk_dormqr__);
+		}
+	    }
+	} else if (rsvec && ! lsvec) {
+/*            singular values and the right singular vectors are requested */
+	    if (conda) {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwcon);
+		minwrk = *n + f2cmax(i__1,lwsvd);
+	    } else {
+		minwrk = *n + f2cmax(lwqp3,lwsvd);
+	    }
+	    if (lquery) {
+		if (rtrans) {
+		    dgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		} else {
+		    dgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		}
+		lwrk_dgesvd__ = (integer) rdummy[0];
+		if (conda) {
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_dgeqp3__,lwcon);
+		    optwrk = *n + f2cmax(i__1,lwrk_dgesvd__);
+		} else {
+		    optwrk = *n + f2cmax(lwrk_dgeqp3__,lwrk_dgesvd__);
+		}
+	    }
+	} else {
+/*            full SVD is requested */
+	    if (rtrans) {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwsvd);
+		minwrk = f2cmax(i__1,lworq);
+		if (conda) {
+		    minwrk = f2cmax(minwrk,lwcon);
+		}
+		minwrk += *n;
+		if (wntva) {
+/* Computing MAX */
+		    i__1 = *n / 2;
+		    lwqrf = f2cmax(i__1,1);
+/* Computing MAX */
+		    i__1 = *n / 2 * 5;
+		    lwsvd2 = f2cmax(i__1,1);
+		    lworq2 = f2cmax(*n,1);
+/* Computing MAX */
+		    i__1 = lwqp3, i__2 = *n / 2 + lwqrf, i__1 = f2cmax(i__1,i__2)
+			    , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), 
+			    i__2 = *n / 2 + lworq2, i__1 = f2cmax(i__1,i__2);
+		    minwrk2 = f2cmax(i__1,lworq);
+		    if (conda) {
+			minwrk2 = f2cmax(minwrk2,lwcon);
+		    }
+		    minwrk2 = *n + minwrk2;
+		    minwrk = f2cmax(minwrk,minwrk2);
+		}
+	    } else {
+/* Computing MAX */
+		i__1 = f2cmax(lwqp3,lwsvd);
+		minwrk = f2cmax(i__1,lworq);
+		if (conda) {
+		    minwrk = f2cmax(minwrk,lwcon);
+		}
+		minwrk += *n;
+		if (wntva) {
+/* Computing MAX */
+		    i__1 = *n / 2;
+		    lwlqf = f2cmax(i__1,1);
+/* Computing MAX */
+		    i__1 = *n / 2 * 5;
+		    lwsvd2 = f2cmax(i__1,1);
+		    lworlq = f2cmax(*n,1);
+/* Computing MAX */
+		    i__1 = lwqp3, i__2 = *n / 2 + lwlqf, i__1 = f2cmax(i__1,i__2)
+			    , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), 
+			    i__2 = *n / 2 + lworlq, i__1 = f2cmax(i__1,i__2);
+		    minwrk2 = f2cmax(i__1,lworq);
+		    if (conda) {
+			minwrk2 = f2cmax(minwrk2,lwcon);
+		    }
+		    minwrk2 = *n + minwrk2;
+		    minwrk = f2cmax(minwrk,minwrk2);
+		}
+	    }
+	    if (lquery) {
+		if (rtrans) {
+		    dgesvd_("O", "A", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		    lwrk_dgesvd__ = (integer) rdummy[0];
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_dgeqp3__,lwrk_dgesvd__);
+		    optwrk = f2cmax(i__1,lwrk_dormqr__);
+		    if (conda) {
+			optwrk = f2cmax(optwrk,lwcon);
+		    }
+		    optwrk = *n + optwrk;
+		    if (wntva) {
+			i__1 = *n / 2;
+			dgeqrf_(n, &i__1, &u[u_offset], ldu, rdummy, rdummy, &
+				c_n1, &ierr);
+			lwrk_dgeqrf__ = (integer) rdummy[0];
+			i__1 = *n / 2;
+			i__2 = *n / 2;
+			dgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[
+				1], &u[u_offset], ldu, &v[v_offset], ldv, 
+				rdummy, &c_n1, &ierr);
+			lwrk_dgesvd2__ = (integer) rdummy[0];
+			i__1 = *n / 2;
+			dormqr_("R", "C", n, n, &i__1, &u[u_offset], ldu, 
+				rdummy, &v[v_offset], ldv, rdummy, &c_n1, &
+				ierr);
+			lwrk_dormqr2__ = (integer) rdummy[0];
+/* Computing MAX */
+			i__1 = lwrk_dgeqp3__, i__2 = *n / 2 + lwrk_dgeqrf__, 
+				i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + 
+				lwrk_dgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = 
+				*n / 2 + lwrk_dormqr2__;
+			optwrk2 = f2cmax(i__1,i__2);
+			if (conda) {
+			    optwrk2 = f2cmax(optwrk2,lwcon);
+			}
+			optwrk2 = *n + optwrk2;
+			optwrk = f2cmax(optwrk,optwrk2);
+		    }
+		} else {
+		    dgesvd_("S", "O", n, n, &a[a_offset], lda, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, 
+			    &ierr);
+		    lwrk_dgesvd__ = (integer) rdummy[0];
+/* Computing MAX */
+		    i__1 = f2cmax(lwrk_dgeqp3__,lwrk_dgesvd__);
+		    optwrk = f2cmax(i__1,lwrk_dormqr__);
+		    if (conda) {
+			optwrk = f2cmax(optwrk,lwcon);
+		    }
+		    optwrk = *n + optwrk;
+		    if (wntva) {
+			i__1 = *n / 2;
+			dgelqf_(&i__1, n, &u[u_offset], ldu, rdummy, rdummy, &
+				c_n1, &ierr);
+			lwrk_dgelqf__ = (integer) rdummy[0];
+			i__1 = *n / 2;
+			i__2 = *n / 2;
+			dgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[
+				1], &u[u_offset], ldu, &v[v_offset], ldv, 
+				rdummy, &c_n1, &ierr);
+			lwrk_dgesvd2__ = (integer) rdummy[0];
+			i__1 = *n / 2;
+			dormlq_("R", "N", n, n, &i__1, &u[u_offset], ldu, 
+				rdummy, &v[v_offset], ldv, rdummy, &c_n1, &
+				ierr);
+			lwrk_dormlq__ = (integer) rdummy[0];
+/* Computing MAX */
+			i__1 = lwrk_dgeqp3__, i__2 = *n / 2 + lwrk_dgelqf__, 
+				i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + 
+				lwrk_dgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = 
+				*n / 2 + lwrk_dormlq__;
+			optwrk2 = f2cmax(i__1,i__2);
+			if (conda) {
+			    optwrk2 = f2cmax(optwrk2,lwcon);
+			}
+			optwrk2 = *n + optwrk2;
+			optwrk = f2cmax(optwrk,optwrk2);
+		    }
+		}
+	    }
+	}
+
+	minwrk = f2cmax(2,minwrk);
+	optwrk = f2cmax(2,optwrk);
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -19;
+	}
+
+    }
+
+    if (*info == 0 && *lrwork < rminwrk && ! lquery) {
+	*info = -21;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESVDQ", &i__1, (ftnlen)7);
+	return 0;
+    } else if (lquery) {
+
+/*     Return optimal workspace */
+
+	iwork[1] = iminwrk;
+	work[1] = (doublereal) optwrk;
+	work[2] = (doublereal) minwrk;
+	rwork[1] = (doublereal) rminwrk;
+	return 0;
+    }
+
+/*     Quick return if the matrix is void. */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+    big = dlamch_("O");
+    ascaled = FALSE_;
+    iwoff = 1;
+    if (rowprm) {
+	iwoff = *m;
+/*           ell-infinity norm - this enhances numerical robustness in */
+/*           the case of differently scaled rows. */
+	i__1 = *m;
+	for (p = 1; p <= i__1; ++p) {
+/*               RWORK(p) = ABS( A(p,ICAMAX(N,A(p,1),LDA)) ) */
+/*               [[DLANGE will return NaN if an entry of the p-th row is Nan]] */
+	    rwork[p] = dlange_("M", &c__1, n, &a[p + a_dim1], lda, rdummy);
+	    if (rwork[p] != rwork[p] || rwork[p] * 0. != 0.) {
+		*info = -8;
+		i__2 = -(*info);
+		xerbla_("DGESVDQ", &i__2, (ftnlen)7);
+		return 0;
+	    }
+/* L1904: */
+	}
+	i__1 = *m - 1;
+	for (p = 1; p <= i__1; ++p) {
+	    i__2 = *m - p + 1;
+	    q = idamax_(&i__2, &rwork[p], &c__1) + p - 1;
+	    iwork[*n + p] = q;
+	    if (p != q) {
+		rtmp = rwork[p];
+		rwork[p] = rwork[q];
+		rwork[q] = rtmp;
+	    }
+/* L1952: */
+	}
+
+	if (rwork[1] == 0.) {
+/*              Quick return: A is the M x N zero matrix. */
+	    *numrank = 0;
+	    dlaset_("G", n, &c__1, &c_b72, &c_b72, &s[1], n);
+	    if (wntus) {
+		dlaset_("G", m, n, &c_b72, &c_b76, &u[u_offset], ldu);
+	    }
+	    if (wntua) {
+		dlaset_("G", m, m, &c_b72, &c_b76, &u[u_offset], ldu);
+	    }
+	    if (wntva) {
+		dlaset_("G", n, n, &c_b72, &c_b76, &v[v_offset], ldv);
+	    }
+	    if (wntuf) {
+		dlaset_("G", n, &c__1, &c_b72, &c_b72, &work[1], n)
+			;
+		dlaset_("G", m, n, &c_b72, &c_b76, &u[u_offset], ldu);
+	    }
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		iwork[p] = p;
+/* L5001: */
+	    }
+	    if (rowprm) {
+		i__1 = *n + *m - 1;
+		for (p = *n + 1; p <= i__1; ++p) {
+		    iwork[p] = p - *n;
+/* L5002: */
+		}
+	    }
+	    if (conda) {
+		rwork[1] = -1.;
+	    }
+	    rwork[2] = -1.;
+	    return 0;
+	}
+
+	if (rwork[1] > big / sqrt((doublereal) (*m))) {
+/*               matrix by 1/sqrt(M) if too large entry detected */
+	    d__1 = sqrt((doublereal) (*m));
+	    dlascl_("G", &c__0, &c__0, &d__1, &c_b76, m, n, &a[a_offset], lda,
+		     &ierr);
+	    ascaled = TRUE_;
+	}
+	i__1 = *m - 1;
+	dlaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[*n + 1], &c__1);
+    }
+
+/*    norms overflows during the QR factorization. The SVD procedure should */
+/*    have its own scaling to save the singular values from overflows and */
+/*    underflows. That depends on the SVD procedure. */
+
+    if (! rowprm) {
+	rtmp = dlange_("M", m, n, &a[a_offset], lda, rdummy);
+	if (rtmp != rtmp || rtmp * 0. != 0.) {
+	    *info = -8;
+	    i__1 = -(*info);
+	    xerbla_("DGESVDQ", &i__1, (ftnlen)7);
+	    return 0;
+	}
+	if (rtmp > big / sqrt((doublereal) (*m))) {
+/*             matrix by 1/sqrt(M) if too large entry detected */
+	    d__1 = sqrt((doublereal) (*m));
+	    dlascl_("G", &c__0, &c__0, &d__1, &c_b76, m, n, &a[a_offset], lda,
+		     &ierr);
+	    ascaled = TRUE_;
+	}
+    }
+
+
+/*     A * P = Q * [ R ] */
+/*                 [ 0 ] */
+
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+	iwork[p] = 0;
+/* L1963: */
+    }
+    i__1 = *lwork - *n;
+    dgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], &
+	    i__1, &ierr);
+
+/*    If the user requested accuracy level allows truncation in the */
+/*    computed upper triangular factor, the matrix R is examined and, */
+/*    if possible, replaced with its leading upper trapezoidal part. */
+
+    epsln = dlamch_("E");
+    sfmin = dlamch_("S");
+/*     SMALL = SFMIN / EPSLN */
+    nr = *n;
+
+    if (accla) {
+
+/*        Standard absolute error bound suffices. All sigma_i with */
+/*        sigma_i < N*EPS*||A||_F are flushed to zero. This is an */
+/*        aggressive enforcement of lower numerical rank by introducing a */
+/*        backward error of the order of N*EPS*||A||_F. */
+	nr = 1;
+	rtmp = sqrt((doublereal) (*n)) * epsln;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((d__2 = a[p + p * a_dim1], abs(d__2)) < rtmp * (d__1 = a[
+		    a_dim1 + 1], abs(d__1))) {
+		goto L3002;
+	    }
+	    ++nr;
+/* L3001: */
+	}
+L3002:
+
+	;
+    } else if (acclm) {
+/*        Sudden drop on the diagonal of R is used as the criterion for being */
+/*        close-to-rank-deficient. The threshold is set to EPSLN=DLAMCH('E'). */
+/*        [[This can be made more flexible by replacing this hard-coded value */
+/*        with a user specified threshold.]] Also, the values that underflow */
+/*        will be truncated. */
+	nr = 1;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((d__2 = a[p + p * a_dim1], abs(d__2)) < epsln * (d__1 = a[p - 
+		    1 + (p - 1) * a_dim1], abs(d__1)) || (d__3 = a[p + p * 
+		    a_dim1], abs(d__3)) < sfmin) {
+		goto L3402;
+	    }
+	    ++nr;
+/* L3401: */
+	}
+L3402:
+
+	;
+    } else {
+/*        obvious case of zero pivots. */
+/*        R(i,i)=0 => R(i:N,i:N)=0. */
+	nr = 1;
+	i__1 = *n;
+	for (p = 2; p <= i__1; ++p) {
+	    if ((d__1 = a[p + p * a_dim1], abs(d__1)) == 0.) {
+		goto L3502;
+	    }
+	    ++nr;
+/* L3501: */
+	}
+L3502:
+
+	if (conda) {
+/*           Estimate the scaled condition number of A. Use the fact that it is */
+/*           the same as the scaled condition number of R. */
+	    dlacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv);
+/*              Only the leading NR x NR submatrix of the triangular factor */
+/*              is considered. Only if NR=N will this give a reliable error */
+/*              bound. However, even for NR < N, this can be used on an */
+/*              expert level and obtain useful information in the sense of */
+/*              perturbation theory. */
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		rtmp = dnrm2_(&p, &v[p * v_dim1 + 1], &c__1);
+		d__1 = 1. / rtmp;
+		dscal_(&p, &d__1, &v[p * v_dim1 + 1], &c__1);
+/* L3053: */
+	    }
+	    if (! (lsvec || rsvec)) {
+		dpocon_("U", &nr, &v[v_offset], ldv, &c_b76, &rtmp, &work[1], 
+			&iwork[*n + iwoff], &ierr);
+	    } else {
+		dpocon_("U", &nr, &v[v_offset], ldv, &c_b76, &rtmp, &work[*n 
+			+ 1], &iwork[*n + iwoff], &ierr);
+	    }
+	    sconda = 1. / sqrt(rtmp);
+/*           For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1), */
+/*           N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */
+/*           See the reference [1] for more details. */
+	}
+
+    }
+
+    if (wntur) {
+	n1 = nr;
+    } else if (wntus || wntuf) {
+	n1 = *n;
+    } else if (wntua) {
+	n1 = *m;
+    }
+
+    if (! (rsvec || lsvec)) {
+/* ....................................................................... */
+/* ....................................................................... */
+	if (rtrans) {
+
+/*           the upper triangle of [A] to zero. */
+	    i__1 = f2cmin(*n,nr);
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n;
+		for (q = p + 1; q <= i__2; ++q) {
+		    a[q + p * a_dim1] = a[p + q * a_dim1];
+		    if (q <= nr) {
+			a[p + q * a_dim1] = 0.;
+		    }
+/* L1147: */
+		}
+/* L1146: */
+	    }
+
+	    dgesvd_("N", "N", n, &nr, &a[a_offset], lda, &s[1], &u[u_offset], 
+		    ldu, &v[v_offset], ldv, &work[1], lwork, info);
+
+	} else {
+
+
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &a[a_dim1 + 2], 
+			lda);
+	    }
+	    dgesvd_("N", "N", &nr, n, &a[a_offset], lda, &s[1], &u[u_offset], 
+		    ldu, &v[v_offset], ldv, &work[1], lwork, info);
+
+	}
+
+    } else if (lsvec && ! rsvec) {
+/* ....................................................................... */
+/* ......................................................................."""""""" */
+	if (rtrans) {
+/*            vectors of R */
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n;
+		for (q = p; q <= i__2; ++q) {
+		    u[q + p * u_dim1] = a[p + q * a_dim1];
+/* L1193: */
+		}
+/* L1192: */
+	    }
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &u[(u_dim1 << 1) + 
+			1], ldu);
+	    }
+/*           vectors overwrite [U](1:NR,1:NR) as transposed. These */
+/*           will be pre-multiplied by Q to build the left singular vectors of A. */
+	    i__1 = *lwork - *n;
+	    dgesvd_("N", "O", n, &nr, &u[u_offset], ldu, &s[1], &u[u_offset], 
+		    ldu, &u[u_offset], ldu, &work[*n + 1], &i__1, info);
+
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = nr;
+		for (q = p + 1; q <= i__2; ++q) {
+		    rtmp = u[q + p * u_dim1];
+		    u[q + p * u_dim1] = u[p + q * u_dim1];
+		    u[p + q * u_dim1] = rtmp;
+/* L1120: */
+		}
+/* L1119: */
+	    }
+
+	} else {
+	    dlacpy_("U", &nr, n, &a[a_offset], lda, &u[u_offset], ldu);
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &u[u_dim1 + 2], 
+			ldu);
+	    }
+/*            vectors overwrite [U](1:NR,1:NR) */
+	    i__1 = *lwork - *n;
+	    dgesvd_("O", "N", &nr, n, &u[u_offset], ldu, &s[1], &u[u_offset], 
+		    ldu, &v[v_offset], ldv, &work[*n + 1], &i__1, info);
+/*               R. These will be pre-multiplied by Q to build the left singular */
+/*               vectors of A. */
+	}
+
+/*              (M x NR) or (M x N) or (M x M). */
+	if (nr < *m && ! wntuf) {
+	    i__1 = *m - nr;
+	    dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + u_dim1], ldu);
+	    if (nr < n1) {
+		i__1 = n1 - nr;
+		dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) * u_dim1 
+			+ 1], ldu);
+		i__1 = *m - nr;
+		i__2 = n1 - nr;
+		dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 + (nr + 
+			1) * u_dim1], ldu);
+	    }
+	}
+
+/*           The Q matrix from the first QRF is built into the left singular */
+/*           vectors matrix U. */
+
+	if (! wntuf) {
+	    i__1 = *lwork - *n;
+	    dormqr_("L", "N", m, &n1, n, &a[a_offset], lda, &work[1], &u[
+		    u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+	}
+	if (rowprm && ! wntuf) {
+	    i__1 = *m - 1;
+	    dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], &
+		    c_n1);
+	}
+
+    } else if (rsvec && ! lsvec) {
+/* ....................................................................... */
+/* ....................................................................... */
+	if (rtrans) {
+	    i__1 = nr;
+	    for (p = 1; p <= i__1; ++p) {
+		i__2 = *n;
+		for (q = p; q <= i__2; ++q) {
+		    v[q + p * v_dim1] = a[p + q * a_dim1];
+/* L1166: */
+		}
+/* L1165: */
+	    }
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << 1) + 
+			1], ldv);
+	    }
+/*           vectors not computed */
+	    if (wntvr || nr == *n) {
+		i__1 = *lwork - *n;
+		dgesvd_("O", "N", n, &nr, &v[v_offset], ldv, &s[1], &u[
+			u_offset], ldu, &u[u_offset], ldu, &work[*n + 1], &
+			i__1, info);
+
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = nr;
+		    for (q = p + 1; q <= i__2; ++q) {
+			rtmp = v[q + p * v_dim1];
+			v[q + p * v_dim1] = v[p + q * v_dim1];
+			v[p + q * v_dim1] = rtmp;
+/* L1122: */
+		    }
+/* L1121: */
+		}
+
+		if (nr < *n) {
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = nr + 1; q <= i__2; ++q) {
+			    v[p + q * v_dim1] = v[q + p * v_dim1];
+/* L1104: */
+			}
+/* L1103: */
+		    }
+		}
+		dlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]);
+	    } else {
+/*               [!] This is simple implementation that augments [V](1:N,1:NR) */
+/*               by padding a zero block. In the case NR << N, a more efficient */
+/*               way is to first use the QR factorization. For more details */
+/*               how to implement this, see the " FULL SVD " branch. */
+		i__1 = *n - nr;
+		dlaset_("G", n, &i__1, &c_b72, &c_b72, &v[(nr + 1) * v_dim1 + 
+			1], ldv);
+		i__1 = *lwork - *n;
+		dgesvd_("O", "N", n, n, &v[v_offset], ldv, &s[1], &u[u_offset]
+			, ldu, &u[u_offset], ldu, &work[*n + 1], &i__1, info);
+
+		i__1 = *n;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = *n;
+		    for (q = p + 1; q <= i__2; ++q) {
+			rtmp = v[q + p * v_dim1];
+			v[q + p * v_dim1] = v[p + q * v_dim1];
+			v[p + q * v_dim1] = rtmp;
+/* L1124: */
+		    }
+/* L1123: */
+		}
+		dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+	    }
+
+	} else {
+	    dlacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv);
+	    if (nr > 1) {
+		i__1 = nr - 1;
+		i__2 = nr - 1;
+		dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + 2], 
+			ldv);
+	    }
+/*            vectors stored in U(1:NR,1:NR) */
+	    if (wntvr || nr == *n) {
+		i__1 = *lwork - *n;
+		dgesvd_("N", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[
+			u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], &
+			i__1, info);
+		dlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]);
+	    } else {
+/*               [!] This is simple implementation that augments [V](1:NR,1:N) */
+/*               by padding a zero block. In the case NR << N, a more efficient */
+/*               way is to first use the LQ factorization. For more details */
+/*               how to implement this, see the " FULL SVD " branch. */
+		i__1 = *n - nr;
+		dlaset_("G", &i__1, n, &c_b72, &c_b72, &v[nr + 1 + v_dim1], 
+			ldv);
+		i__1 = *lwork - *n;
+		dgesvd_("N", "O", n, n, &v[v_offset], ldv, &s[1], &u[u_offset]
+			, ldu, &v[v_offset], ldv, &work[*n + 1], &i__1, info);
+		dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+	    }
+/*            vectors of A. */
+	}
+
+    } else {
+/* ....................................................................... */
+/* ....................................................................... */
+	if (rtrans) {
+
+
+	    if (wntvr || nr == *n) {
+/*            vectors of R**T */
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = *n;
+		    for (q = p; q <= i__2; ++q) {
+			v[q + p * v_dim1] = a[p + q * a_dim1];
+/* L1169: */
+		    }
+/* L1168: */
+		}
+		if (nr > 1) {
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << 
+			    1) + 1], ldv);
+		}
+
+/*           singular vectors of R**T stored in [U](1:NR,1:NR) as transposed */
+		i__1 = *lwork - *n;
+		dgesvd_("O", "A", n, &nr, &v[v_offset], ldv, &s[1], &v[
+			v_offset], ldv, &u[u_offset], ldu, &work[*n + 1], &
+			i__1, info);
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = nr;
+		    for (q = p + 1; q <= i__2; ++q) {
+			rtmp = v[q + p * v_dim1];
+			v[q + p * v_dim1] = v[p + q * v_dim1];
+			v[p + q * v_dim1] = rtmp;
+/* L1116: */
+		    }
+/* L1115: */
+		}
+		if (nr < *n) {
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = nr + 1; q <= i__2; ++q) {
+			    v[p + q * v_dim1] = v[q + p * v_dim1];
+/* L1102: */
+			}
+/* L1101: */
+		    }
+		}
+		dlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]);
+
+		i__1 = nr;
+		for (p = 1; p <= i__1; ++p) {
+		    i__2 = nr;
+		    for (q = p + 1; q <= i__2; ++q) {
+			rtmp = u[q + p * u_dim1];
+			u[q + p * u_dim1] = u[p + q * u_dim1];
+			u[p + q * u_dim1] = rtmp;
+/* L1118: */
+		    }
+/* L1117: */
+		}
+
+		if (nr < *m && ! wntuf) {
+		    i__1 = *m - nr;
+		    dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + 
+			    u_dim1], ldu);
+		    if (nr < n1) {
+			i__1 = n1 - nr;
+			dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) *
+				 u_dim1 + 1], ldu);
+			i__1 = *m - nr;
+			i__2 = n1 - nr;
+			dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 
+				+ (nr + 1) * u_dim1], ldu);
+		    }
+		}
+
+	    } else {
+/*            vectors of R**T */
+/*               [[The optimal ratio N/NR for using QRF instead of padding */
+/*                 with zeros. Here hard coded to 2; it must be at least */
+/*                 two due to work space constraints.]] */
+/*               OPTRATIO = ILAENV(6, 'DGESVD', 'S' // 'O', NR,N,0,0) */
+/*               OPTRATIO = MAX( OPTRATIO, 2 ) */
+		optratio = 2;
+		if (optratio * nr > *n) {
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = p; q <= i__2; ++q) {
+			    v[q + p * v_dim1] = a[p + q * a_dim1];
+/* L1199: */
+			}
+/* L1198: */
+		    }
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 
+				<< 1) + 1], ldv);
+		    }
+
+		    i__1 = *n - nr;
+		    dlaset_("A", n, &i__1, &c_b72, &c_b72, &v[(nr + 1) * 
+			    v_dim1 + 1], ldv);
+		    i__1 = *lwork - *n;
+		    dgesvd_("O", "A", n, n, &v[v_offset], ldv, &s[1], &v[
+			    v_offset], ldv, &u[u_offset], ldu, &work[*n + 1], 
+			    &i__1, info);
+
+		    i__1 = *n;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = p + 1; q <= i__2; ++q) {
+			    rtmp = v[q + p * v_dim1];
+			    v[q + p * v_dim1] = v[p + q * v_dim1];
+			    v[p + q * v_dim1] = rtmp;
+/* L1114: */
+			}
+/* L1113: */
+		    }
+		    dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+/*              (M x N1), i.e. (M x N) or (M x M). */
+
+		    i__1 = *n;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = p + 1; q <= i__2; ++q) {
+			    rtmp = u[q + p * u_dim1];
+			    u[q + p * u_dim1] = u[p + q * u_dim1];
+			    u[p + q * u_dim1] = rtmp;
+/* L1112: */
+			}
+/* L1111: */
+		    }
+
+		    if (*n < *m && ! wntuf) {
+			i__1 = *m - *n;
+			dlaset_("A", &i__1, n, &c_b72, &c_b72, &u[*n + 1 + 
+				u_dim1], ldu);
+			if (*n < n1) {
+			    i__1 = n1 - *n;
+			    dlaset_("A", n, &i__1, &c_b72, &c_b72, &u[(*n + 1)
+				     * u_dim1 + 1], ldu);
+			    i__1 = *m - *n;
+			    i__2 = n1 - *n;
+			    dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[*n 
+				    + 1 + (*n + 1) * u_dim1], ldu);
+			}
+		    }
+		} else {
+/*                  singular vectors of R */
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = p; q <= i__2; ++q) {
+			    u[q + (nr + p) * u_dim1] = a[p + q * a_dim1];
+/* L1197: */
+			}
+/* L1196: */
+		    }
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &u[(nr + 2)
+				 * u_dim1 + 1], ldu);
+		    }
+		    i__1 = *lwork - *n - nr;
+		    dgeqrf_(n, &nr, &u[(nr + 1) * u_dim1 + 1], ldu, &work[*n 
+			    + 1], &work[*n + nr + 1], &i__1, &ierr);
+		    i__1 = nr;
+		    for (p = 1; p <= i__1; ++p) {
+			i__2 = *n;
+			for (q = 1; q <= i__2; ++q) {
+			    v[q + p * v_dim1] = u[p + (nr + q) * u_dim1];
+/* L1144: */
+			}
+/* L1143: */
+		    }
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << 
+			    1) + 1], ldv);
+		    i__1 = *lwork - *n - nr;
+		    dgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, &work[*n + nr 
+			    + 1], &i__1, info);
+		    i__1 = *n - nr;
+		    dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &v[nr + 1 + 
+			    v_dim1], ldv);
+		    i__1 = *n - nr;
+		    dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &v[(nr + 1) * 
+			    v_dim1 + 1], ldv);
+		    i__1 = *n - nr;
+		    i__2 = *n - nr;
+		    dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &v[nr + 1 + (
+			    nr + 1) * v_dim1], ldv);
+		    i__1 = *lwork - *n - nr;
+		    dormqr_("R", "C", n, n, &nr, &u[(nr + 1) * u_dim1 + 1], 
+			    ldu, &work[*n + 1], &v[v_offset], ldv, &work[*n + 
+			    nr + 1], &i__1, &ierr);
+		    dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+/*                 (M x NR) or (M x N) or (M x M). */
+		    if (nr < *m && ! wntuf) {
+			i__1 = *m - nr;
+			dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + 
+				u_dim1], ldu);
+			if (nr < n1) {
+			    i__1 = n1 - nr;
+			    dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 
+				    1) * u_dim1 + 1], ldu);
+			    i__1 = *m - nr;
+			    i__2 = n1 - nr;
+			    dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr 
+				    + 1 + (nr + 1) * u_dim1], ldu);
+			}
+		    }
+		}
+	    }
+
+	} else {
+
+
+	    if (wntvr || nr == *n) {
+		dlacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv);
+		if (nr > 1) {
+		    i__1 = nr - 1;
+		    i__2 = nr - 1;
+		    dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + 2],
+			     ldv);
+		}
+/*               singular vectors of R stored in [U](1:NR,1:NR) */
+		i__1 = *lwork - *n;
+		dgesvd_("S", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[
+			u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], &
+			i__1, info);
+		dlapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]);
+/*              (M x NR) or (M x N) or (M x M). */
+		if (nr < *m && ! wntuf) {
+		    i__1 = *m - nr;
+		    dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + 
+			    u_dim1], ldu);
+		    if (nr < n1) {
+			i__1 = n1 - nr;
+			dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) *
+				 u_dim1 + 1], ldu);
+			i__1 = *m - nr;
+			i__2 = n1 - nr;
+			dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 
+				+ (nr + 1) * u_dim1], ldu);
+		    }
+		}
+
+	    } else {
+/*               is then N1 (N or M) */
+/*               [[The optimal ratio N/NR for using LQ instead of padding */
+/*                 with zeros. Here hard coded to 2; it must be at least */
+/*                 two due to work space constraints.]] */
+/*               OPTRATIO = ILAENV(6, 'DGESVD', 'S' // 'O', NR,N,0,0) */
+/*               OPTRATIO = MAX( OPTRATIO, 2 ) */
+		optratio = 2;
+		if (optratio * nr > *n) {
+		    dlacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv);
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 
+				+ 2], ldv);
+		    }
+/*                 singular vectors of R stored in [U](1:NR,1:NR) */
+		    i__1 = *n - nr;
+		    dlaset_("A", &i__1, n, &c_b72, &c_b72, &v[nr + 1 + v_dim1]
+			    , ldv);
+		    i__1 = *lwork - *n;
+		    dgesvd_("S", "O", n, n, &v[v_offset], ldv, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], 
+			    &i__1, info);
+		    dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+/*                 singular vectors of A. The leading N left singular vectors */
+/*                 are in [U](1:N,1:N) */
+/*                 (M x N1), i.e. (M x N) or (M x M). */
+		    if (*n < *m && ! wntuf) {
+			i__1 = *m - *n;
+			dlaset_("A", &i__1, n, &c_b72, &c_b72, &u[*n + 1 + 
+				u_dim1], ldu);
+			if (*n < n1) {
+			    i__1 = n1 - *n;
+			    dlaset_("A", n, &i__1, &c_b72, &c_b72, &u[(*n + 1)
+				     * u_dim1 + 1], ldu);
+			    i__1 = *m - *n;
+			    i__2 = n1 - *n;
+			    dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[*n 
+				    + 1 + (*n + 1) * u_dim1], ldu);
+			}
+		    }
+		} else {
+		    dlacpy_("U", &nr, n, &a[a_offset], lda, &u[nr + 1 + 
+			    u_dim1], ldu);
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			dlaset_("L", &i__1, &i__2, &c_b72, &c_b72, &u[nr + 2 
+				+ u_dim1], ldu);
+		    }
+		    i__1 = *lwork - *n - nr;
+		    dgelqf_(&nr, n, &u[nr + 1 + u_dim1], ldu, &work[*n + 1], &
+			    work[*n + nr + 1], &i__1, &ierr);
+		    dlacpy_("L", &nr, &nr, &u[nr + 1 + u_dim1], ldu, &v[
+			    v_offset], ldv);
+		    if (nr > 1) {
+			i__1 = nr - 1;
+			i__2 = nr - 1;
+			dlaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 
+				<< 1) + 1], ldv);
+		    }
+		    i__1 = *lwork - *n - nr;
+		    dgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[
+			    u_offset], ldu, &v[v_offset], ldv, &work[*n + nr 
+			    + 1], &i__1, info);
+		    i__1 = *n - nr;
+		    dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &v[nr + 1 + 
+			    v_dim1], ldv);
+		    i__1 = *n - nr;
+		    dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &v[(nr + 1) * 
+			    v_dim1 + 1], ldv);
+		    i__1 = *n - nr;
+		    i__2 = *n - nr;
+		    dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &v[nr + 1 + (
+			    nr + 1) * v_dim1], ldv);
+		    i__1 = *lwork - *n - nr;
+		    dormlq_("R", "N", n, n, &nr, &u[nr + 1 + u_dim1], ldu, &
+			    work[*n + 1], &v[v_offset], ldv, &work[*n + nr + 
+			    1], &i__1, &ierr);
+		    dlapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]);
+/*              (M x NR) or (M x N) or (M x M). */
+		    if (nr < *m && ! wntuf) {
+			i__1 = *m - nr;
+			dlaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + 
+				u_dim1], ldu);
+			if (nr < n1) {
+			    i__1 = n1 - nr;
+			    dlaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 
+				    1) * u_dim1 + 1], ldu);
+			    i__1 = *m - nr;
+			    i__2 = n1 - nr;
+			    dlaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr 
+				    + 1 + (nr + 1) * u_dim1], ldu);
+			}
+		    }
+		}
+	    }
+	}
+
+/*           The Q matrix from the first QRF is built into the left singular */
+/*           vectors matrix U. */
+
+	if (! wntuf) {
+	    i__1 = *lwork - *n;
+	    dormqr_("L", "N", m, &n1, n, &a[a_offset], lda, &work[1], &u[
+		    u_offset], ldu, &work[*n + 1], &i__1, &ierr);
+	}
+	if (rowprm && ! wntuf) {
+	    i__1 = *m - 1;
+	    dlaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], &
+		    c_n1);
+	}
+
+/*     ... end of the "full SVD" branch */
+    }
+
+/*     Check whether some singular values are returned as zeros, e.g. */
+/*     due to underflow, and update the numerical rank. */
+    p = nr;
+    for (q = p; q >= 1; --q) {
+	if (s[q] > 0.) {
+	    goto L4002;
+	}
+	--nr;
+/* L4001: */
+    }
+L4002:
+
+/*     singular values are set to zero. */
+    if (nr < *n) {
+	i__1 = *n - nr;
+	dlaset_("G", &i__1, &c__1, &c_b72, &c_b72, &s[nr + 1], n);
+    }
+/*     values. */
+    if (ascaled) {
+	d__1 = sqrt((doublereal) (*m));
+	dlascl_("G", &c__0, &c__0, &c_b76, &d__1, &nr, &c__1, &s[1], n, &ierr);
+    }
+    if (conda) {
+	rwork[1] = sconda;
+    }
+    rwork[2] = (doublereal) (p - nr);
+/*     exact zeros in DGESVD() applied to the (possibly truncated) */
+/*     full row rank triangular (trapezoidal) factor of A. */
+    *numrank = nr;
+
+    return 0;
+
+/*     End of DGESVDQ */
+
+} /* dgesvdq_ */
+
diff --git a/lapack-netlib/SRC/dgesvdx.c b/lapack-netlib/SRC/dgesvdx.c
new file mode 100644
index 000000000..b379aa59f
--- /dev/null
+++ b/lapack-netlib/SRC/dgesvdx.c
@@ -0,0 +1,1342 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__6 = 6;
+static integer c__0 = 0;
+static integer c__2 = 2;
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static doublereal c_b109 = 0.;
+
+/* > \brief <b> DGESVDX computes the singular value decomposition (SVD) for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGESVDX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvdx
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvdx
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvdx
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*     SUBROUTINE DGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, */
+/*    $                    IL, IU, NS, S, U, LDU, VT, LDVT, WORK, */
+/*    $                    LWORK, IWORK, INFO ) */
+
+
+/*      CHARACTER          JOBU, JOBVT, RANGE */
+/*      INTEGER            IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS */
+/*      DOUBLE PRECISION   VL, VU */
+/*     INTEGER            IWORK( * ) */
+/*     DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ), */
+/*    $                   VT( LDVT, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  DGESVDX computes the singular value decomposition (SVD) of a real */
+/* >  M-by-N matrix A, optionally computing the left and/or right singular */
+/* >  vectors. The SVD is written */
+/* > */
+/* >      A = U * SIGMA * transpose(V) */
+/* > */
+/* >  where SIGMA is an M-by-N matrix which is zero except for its */
+/* >  f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */
+/* >  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA */
+/* >  are the singular values of A; they are real and non-negative, and */
+/* >  are returned in descending order.  The first f2cmin(m,n) columns of */
+/* >  U and V are the left and right singular vectors of A. */
+/* > */
+/* >  DGESVDX uses an eigenvalue problem for obtaining the SVD, which */
+/* >  allows for the computation of a subset of singular values and */
+/* >  vectors. See DBDSVDX for details. */
+/* > */
+/* >  Note that the routine returns V**T, not V. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          Specifies options for computing all or part of the matrix U: */
+/* >          = 'V':  the first f2cmin(m,n) columns of U (the left singular */
+/* >                  vectors) or as specified by RANGE are returned in */
+/* >                  the array U; */
+/* >          = 'N':  no columns of U (no left singular vectors) are */
+/* >                  computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBVT */
+/* > \verbatim */
+/* >          JOBVT is CHARACTER*1 */
+/* >           Specifies options for computing all or part of the matrix */
+/* >           V**T: */
+/* >           = 'V':  the first f2cmin(m,n) rows of V**T (the right singular */
+/* >                   vectors) or as specified by RANGE are returned in */
+/* >                   the array VT; */
+/* >           = 'N':  no rows of V**T (no right singular vectors) are */
+/* >                   computed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] RANGE */
+/* > \verbatim */
+/* >          RANGE is CHARACTER*1 */
+/* >          = 'A': all singular values will be found. */
+/* >          = 'V': all singular values in the half-open interval (VL,VU] */
+/* >                 will be found. */
+/* >          = 'I': the IL-th through IU-th singular values will be found. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, the contents of A are destroyed. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] VL */
+/* > \verbatim */
+/* >          VL is DOUBLE PRECISION */
+/* >          If RANGE='V', the lower bound of the interval to */
+/* >          be searched for singular values. VU > VL. */
+/* >          Not referenced if RANGE = 'A' or 'I'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] VU */
+/* > \verbatim */
+/* >          VU is DOUBLE PRECISION */
+/* >          If RANGE='V', the upper bound of the interval to */
+/* >          be searched for singular values. VU > VL. */
+/* >          Not referenced if RANGE = 'A' or 'I'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IL */
+/* > \verbatim */
+/* >          IL is INTEGER */
+/* >          If RANGE='I', the index of the */
+/* >          smallest singular value to be returned. */
+/* >          1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */
+/* >          Not referenced if RANGE = 'A' or 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IU */
+/* > \verbatim */
+/* >          IU is INTEGER */
+/* >          If RANGE='I', the index of the */
+/* >          largest singular value to be returned. */
+/* >          1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */
+/* >          Not referenced if RANGE = 'A' or 'V'. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] NS */
+/* > \verbatim */
+/* >          NS is INTEGER */
+/* >          The total number of singular values found, */
+/* >          0 <= NS <= f2cmin(M,N). */
+/* >          If RANGE = 'A', NS = f2cmin(M,N); if RANGE = 'I', NS = IU-IL+1. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] S */
+/* > \verbatim */
+/* >          S is DOUBLE PRECISION array, dimension (f2cmin(M,N)) */
+/* >          The singular values of A, sorted so that S(i) >= S(i+1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] U */
+/* > \verbatim */
+/* >          U is DOUBLE PRECISION array, dimension (LDU,UCOL) */
+/* >          If JOBU = 'V', U contains columns of U (the left singular */
+/* >          vectors, stored columnwise) as specified by RANGE; if */
+/* >          JOBU = 'N', U is not referenced. */
+/* >          Note: The user must ensure that UCOL >= NS; if RANGE = 'V', */
+/* >          the exact value of NS is not known in advance and an upper */
+/* >          bound must be used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDU */
+/* > \verbatim */
+/* >          LDU is INTEGER */
+/* >          The leading dimension of the array U.  LDU >= 1; if */
+/* >          JOBU = 'V', LDU >= M. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] VT */
+/* > \verbatim */
+/* >          VT is DOUBLE PRECISION array, dimension (LDVT,N) */
+/* >          If JOBVT = 'V', VT contains the rows of V**T (the right singular */
+/* >          vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N', */
+/* >          VT is not referenced. */
+/* >          Note: The user must ensure that LDVT >= NS; if RANGE = 'V', */
+/* >          the exact value of NS is not known in advance and an upper */
+/* >          bound must be used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDVT */
+/* > \verbatim */
+/* >          LDVT is INTEGER */
+/* >          The leading dimension of the array VT.  LDVT >= 1; if */
+/* >          JOBVT = 'V', LDVT >= NS (see above). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see */
+/* >          comments inside the code): */
+/* >             - PATH 1  (M much larger than N) */
+/* >             - PATH 1t (N much larger than M) */
+/* >          LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths. */
+/* >          For good performance, LWORK should generally be larger. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (12*MIN(M,N)) */
+/* >          If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0, */
+/* >          then IWORK contains the indices of the eigenvectors that failed */
+/* >          to converge in DBDSVDX/DSTEVX. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >     INFO is INTEGER */
+/* >           = 0:  successful exit */
+/* >           < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >           > 0:  if INFO = i, then i eigenvectors failed to converge */
+/* >                 in DBDSVDX/DSTEVX. */
+/* >                 if INFO = N*2 + 1, an internal error occurred in */
+/* >                 DBDSVDX */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup doubleGEsing */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgesvdx_(char *jobu, char *jobvt, char *range, integer *
+	m, integer *n, doublereal *a, integer *lda, doublereal *vl, 
+	doublereal *vu, integer *il, integer *iu, integer *ns, doublereal *s, 
+	doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, 
+	doublereal *work, integer *lwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], 
+	    i__2, i__3;
+    char ch__1[2];
+
+    /* Local variables */
+    integer iscl;
+    logical alls, inds;
+    integer ilqf;
+    doublereal anrm;
+    integer ierr, iqrf, itau;
+    char jobz[1];
+    logical vals;
+    integer i__, j;
+    extern logical lsame_(char *, char *);
+    integer iltgk, itemp, minmn;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer itaup, itauq, iutgk, itgkz, mnthr;
+    logical wantu;
+    integer id, ie;
+    extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, integer *), 
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *, 
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dgeqrf_(integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
+	     integer *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *), dlaset_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    doublereal bignum, abstol;
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *);
+    char rngtgk[1];
+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, integer *), 
+	    dormqr_(char *, char *, integer *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *);
+    integer minwrk, maxwrk;
+    doublereal smlnum;
+    logical lquery, wantvt;
+    doublereal dum[1], eps;
+    extern /* Subroutine */ int dbdsvdx_(char *, char *, char *, integer *, 
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1 * 1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1 * 1;
+    vt -= vt_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *ns = 0;
+    *info = 0;
+    abstol = dlamch_("S") * 2;
+    lquery = *lwork == -1;
+    minmn = f2cmin(*m,*n);
+    wantu = lsame_(jobu, "V");
+    wantvt = lsame_(jobvt, "V");
+    if (wantu || wantvt) {
+	*(unsigned char *)jobz = 'V';
+    } else {
+	*(unsigned char *)jobz = 'N';
+    }
+    alls = lsame_(range, "A");
+    vals = lsame_(range, "V");
+    inds = lsame_(range, "I");
+
+    *info = 0;
+    if (! lsame_(jobu, "V") && ! lsame_(jobu, "N")) {
+	*info = -1;
+    } else if (! lsame_(jobvt, "V") && ! lsame_(jobvt, 
+	    "N")) {
+	*info = -2;
+    } else if (! (alls || vals || inds)) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*m > *lda) {
+	*info = -7;
+    } else if (minmn > 0) {
+	if (vals) {
+	    if (*vl < 0.) {
+		*info = -8;
+	    } else if (*vu <= *vl) {
+		*info = -9;
+	    }
+	} else if (inds) {
+	    if (*il < 1 || *il > f2cmax(1,minmn)) {
+		*info = -10;
+	    } else if (*iu < f2cmin(minmn,*il) || *iu > minmn) {
+		*info = -11;
+	    }
+	}
+	if (*info == 0) {
+	    if (wantu && *ldu < *m) {
+		*info = -15;
+	    } else if (wantvt) {
+		if (inds) {
+		    if (*ldvt < *iu - *il + 1) {
+			*info = -17;
+		    }
+		} else if (*ldvt < minmn) {
+		    *info = -17;
+		}
+	    }
+	}
+    }
+
+/*     Compute workspace */
+/*     (Note: Comments in the code beginning "Workspace:" describe the */
+/*     minimal amount of workspace needed at that point in the code, */
+/*     as well as the preferred amount for good performance. */
+/*     NB refers to the optimal block size for the immediately */
+/*     following subroutine, as returned by ILAENV.) */
+
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	if (minmn > 0) {
+	    if (*m >= *n) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = jobu;
+		i__1[1] = 1, a__1[1] = jobvt;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		mnthr = ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0, (
+			ftnlen)6, (ftnlen)2);
+		if (*m >= mnthr) {
+
+/*                 Path 1 (M much larger than N) */
+
+		    maxwrk = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *n * (*n + 5) + (*n << 1) * ilaenv_(
+			    &c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    maxwrk = f2cmax(i__2,i__3);
+		    if (wantu) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * (*n * 3 + 6) + *n * 
+				ilaenv_(&c__1, "DORMQR", " ", n, n, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    if (wantvt) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * (*n * 3 + 6) + *n * 
+				ilaenv_(&c__1, "DORMLQ", " ", n, n, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    minwrk = *n * (*n * 3 + 20);
+		} else {
+
+/*                 Path 2 (M at least N, but not much larger) */
+
+		    maxwrk = (*n << 2) + (*m + *n) * ilaenv_(&c__1, "DGEBRD", 
+			    " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		    if (wantu) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * ((*n << 1) + 5) + *n * 
+				ilaenv_(&c__1, "DORMQR", " ", n, n, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    if (wantvt) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *n * ((*n << 1) + 5) + *n * 
+				ilaenv_(&c__1, "DORMLQ", " ", n, n, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+/* Computing MAX */
+		    i__2 = *n * ((*n << 1) + 19), i__3 = (*n << 2) + *m;
+		    minwrk = f2cmax(i__2,i__3);
+		}
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = jobu;
+		i__1[1] = 1, a__1[1] = jobvt;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		mnthr = ilaenv_(&c__6, "DGESVD", ch__1, m, n, &c__0, &c__0, (
+			ftnlen)6, (ftnlen)2);
+		if (*n >= mnthr) {
+
+/*                 Path 1t (N much larger than M) */
+
+		    maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__2 = maxwrk, i__3 = *m * (*m + 5) + (*m << 1) * ilaenv_(
+			    &c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    maxwrk = f2cmax(i__2,i__3);
+		    if (wantu) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * (*m * 3 + 6) + *m * 
+				ilaenv_(&c__1, "DORMQR", " ", m, m, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    if (wantvt) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * (*m * 3 + 6) + *m * 
+				ilaenv_(&c__1, "DORMLQ", " ", m, m, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    minwrk = *m * (*m * 3 + 20);
+		} else {
+
+/*                 Path 2t (N at least M, but not much larger) */
+
+		    maxwrk = (*m << 2) + (*m + *n) * ilaenv_(&c__1, "DGEBRD", 
+			    " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		    if (wantu) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * ((*m << 1) + 5) + *m * 
+				ilaenv_(&c__1, "DORMQR", " ", m, m, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+		    if (wantvt) {
+/* Computing MAX */
+			i__2 = maxwrk, i__3 = *m * ((*m << 1) + 5) + *m * 
+				ilaenv_(&c__1, "DORMLQ", " ", m, m, &c_n1, &
+				c_n1, (ftnlen)6, (ftnlen)1);
+			maxwrk = f2cmax(i__2,i__3);
+		    }
+/* Computing MAX */
+		    i__2 = *m * ((*m << 1) + 19), i__3 = (*m << 2) + *n;
+		    minwrk = f2cmax(i__2,i__3);
+		}
+	    }
+	}
+	maxwrk = f2cmax(maxwrk,minwrk);
+	work[1] = (doublereal) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -19;
+	}
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("DGESVDX", &i__2, (ftnlen)7);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Set singular values indices accord to RANGE. */
+
+    if (alls) {
+	*(unsigned char *)rngtgk = 'I';
+	iltgk = 1;
+	iutgk = f2cmin(*m,*n);
+    } else if (inds) {
+	*(unsigned char *)rngtgk = 'I';
+	iltgk = *il;
+	iutgk = *iu;
+    } else {
+	*(unsigned char *)rngtgk = 'V';
+	iltgk = 0;
+	iutgk = 0;
+    }
+
+/*     Get machine constants */
+
+    eps = dlamch_("P");
+    smlnum = sqrt(dlamch_("S")) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+    }
+
+    if (*m >= *n) {
+
+/*        A has at least as many rows as columns. If A has sufficiently */
+/*        more rows than columns, first reduce A using the QR */
+/*        decomposition. */
+
+	if (*m >= mnthr) {
+
+/*           Path 1 (M much larger than N): */
+/*           A = Q * R = Q * ( QB * B * PB**T ) */
+/*                     = Q * ( QB * ( UB * S * VB**T ) * PB**T ) */
+/*           U = Q * QB * UB; V**T = VB**T * PB**T */
+
+/*           Compute A=Q*R */
+/*           (Workspace: need 2*N, prefer N+N*NB) */
+
+	    itau = 1;
+	    itemp = itau + *n;
+	    i__2 = *lwork - itemp + 1;
+	    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2,
+		     info);
+
+/*           Copy R into WORK and bidiagonalize it: */
+/*           (Workspace: need N*N+5*N, prefer N*N+4*N+2*N*NB) */
+
+	    iqrf = itemp;
+	    id = iqrf + *n * *n;
+	    ie = id + *n;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    itemp = itaup + *n;
+	    dlacpy_("U", n, n, &a[a_offset], lda, &work[iqrf], n);
+	    i__2 = *n - 1;
+	    i__3 = *n - 1;
+	    dlaset_("L", &i__2, &i__3, &c_b109, &c_b109, &work[iqrf + 1], n);
+	    i__2 = *lwork - itemp + 1;
+	    dgebrd_(n, n, &work[iqrf], n, &work[id], &work[ie], &work[itauq], 
+		    &work[itaup], &work[itemp], &i__2, info);
+
+/*           Solve eigenvalue problem TGK*Z=Z*S. */
+/*           (Workspace: need 14*N + 2*N*(N+1)) */
+
+	    itgkz = itemp;
+	    itemp = itgkz + *n * ((*n << 1) + 1);
+	    i__2 = *n << 1;
+	    dbdsvdx_("U", jobz, rngtgk, n, &work[id], &work[ie], vl, vu, &
+		    iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[
+		    itemp], &iwork[1], info);
+
+/*           If needed, compute left singular vectors. */
+
+	    if (wantu) {
+		j = itgkz;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    dcopy_(n, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1);
+		    j += *n << 1;
+		}
+		i__2 = *m - *n;
+		dlaset_("A", &i__2, ns, &c_b109, &c_b109, &u[*n + 1 + u_dim1],
+			 ldu);
+
+/*              Call DORMBR to compute QB*UB. */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormbr_("Q", "L", "N", n, ns, n, &work[iqrf], n, &work[itauq],
+			 &u[u_offset], ldu, &work[itemp], &i__2, info);
+
+/*              Call DORMQR to compute Q*(QB*UB). */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormqr_("L", "N", m, ns, n, &a[a_offset], lda, &work[itau], &
+			u[u_offset], ldu, &work[itemp], &i__2, info);
+	    }
+
+/*           If needed, compute right singular vectors. */
+
+	    if (wantvt) {
+		j = itgkz + *n;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    dcopy_(n, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt);
+		    j += *n << 1;
+		}
+
+/*              Call DORMBR to compute VB**T * PB**T */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormbr_("P", "R", "T", ns, n, n, &work[iqrf], n, &work[itaup],
+			 &vt[vt_offset], ldvt, &work[itemp], &i__2, info);
+	    }
+	} else {
+
+/*           Path 2 (M at least N, but not much larger) */
+/*           Reduce A to bidiagonal form without QR decomposition */
+/*           A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */
+/*           U = QB * UB; V**T = VB**T * PB**T */
+
+/*           Bidiagonalize A */
+/*           (Workspace: need 4*N+M, prefer 4*N+(M+N)*NB) */
+
+	    id = 1;
+	    ie = id + *n;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    itemp = itaup + *n;
+	    i__2 = *lwork - itemp + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &work[id], &work[ie], &work[
+		    itauq], &work[itaup], &work[itemp], &i__2, info);
+
+/*           Solve eigenvalue problem TGK*Z=Z*S. */
+/*           (Workspace: need 14*N + 2*N*(N+1)) */
+
+	    itgkz = itemp;
+	    itemp = itgkz + *n * ((*n << 1) + 1);
+	    i__2 = *n << 1;
+	    dbdsvdx_("U", jobz, rngtgk, n, &work[id], &work[ie], vl, vu, &
+		    iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[
+		    itemp], &iwork[1], info);
+
+/*           If needed, compute left singular vectors. */
+
+	    if (wantu) {
+		j = itgkz;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    dcopy_(n, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1);
+		    j += *n << 1;
+		}
+		i__2 = *m - *n;
+		dlaset_("A", &i__2, ns, &c_b109, &c_b109, &u[*n + 1 + u_dim1],
+			 ldu);
+
+/*              Call DORMBR to compute QB*UB. */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[itemp], &i__2, &ierr);
+	    }
+
+/*           If needed, compute right singular vectors. */
+
+	    if (wantvt) {
+		j = itgkz + *n;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    dcopy_(n, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt);
+		    j += *n << 1;
+		}
+
+/*              Call DORMBR to compute VB**T * PB**T */
+/*              (Workspace in WORK( ITEMP ): need N, prefer N*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormbr_("P", "R", "T", ns, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, &
+			ierr);
+	    }
+	}
+    } else {
+
+/*        A has more columns than rows. If A has sufficiently more */
+/*        columns than rows, first reduce A using the LQ decomposition. */
+
+	if (*n >= mnthr) {
+
+/*           Path 1t (N much larger than M): */
+/*           A = L * Q = ( QB * B * PB**T ) * Q */
+/*                     = ( QB * ( UB * S * VB**T ) * PB**T ) * Q */
+/*           U = QB * UB ; V**T = VB**T * PB**T * Q */
+
+/*           Compute A=L*Q */
+/*           (Workspace: need 2*M, prefer M+M*NB) */
+
+	    itau = 1;
+	    itemp = itau + *m;
+	    i__2 = *lwork - itemp + 1;
+	    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2,
+		     info);
+/*           Copy L into WORK and bidiagonalize it: */
+/*           (Workspace in WORK( ITEMP ): need M*M+5*N, prefer M*M+4*M+2*M*NB) */
+
+	    ilqf = itemp;
+	    id = ilqf + *m * *m;
+	    ie = id + *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    itemp = itaup + *m;
+	    dlacpy_("L", m, m, &a[a_offset], lda, &work[ilqf], m);
+	    i__2 = *m - 1;
+	    i__3 = *m - 1;
+	    dlaset_("U", &i__2, &i__3, &c_b109, &c_b109, &work[ilqf + *m], m);
+	    i__2 = *lwork - itemp + 1;
+	    dgebrd_(m, m, &work[ilqf], m, &work[id], &work[ie], &work[itauq], 
+		    &work[itaup], &work[itemp], &i__2, info);
+
+/*           Solve eigenvalue problem TGK*Z=Z*S. */
+/*           (Workspace: need 2*M*M+14*M) */
+
+	    itgkz = itemp;
+	    itemp = itgkz + *m * ((*m << 1) + 1);
+	    i__2 = *m << 1;
+	    dbdsvdx_("U", jobz, rngtgk, m, &work[id], &work[ie], vl, vu, &
+		    iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[
+		    itemp], &iwork[1], info);
+
+/*           If needed, compute left singular vectors. */
+
+	    if (wantu) {
+		j = itgkz;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    dcopy_(m, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1);
+		    j += *m << 1;
+		}
+
+/*              Call DORMBR to compute QB*UB. */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormbr_("Q", "L", "N", m, ns, m, &work[ilqf], m, &work[itauq],
+			 &u[u_offset], ldu, &work[itemp], &i__2, info);
+	    }
+
+/*           If needed, compute right singular vectors. */
+
+	    if (wantvt) {
+		j = itgkz + *m;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    dcopy_(m, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt);
+		    j += *m << 1;
+		}
+		i__2 = *n - *m;
+		dlaset_("A", ns, &i__2, &c_b109, &c_b109, &vt[(*m + 1) * 
+			vt_dim1 + 1], ldvt);
+
+/*              Call DORMBR to compute (VB**T)*(PB**T) */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormbr_("P", "R", "T", ns, m, m, &work[ilqf], m, &work[itaup],
+			 &vt[vt_offset], ldvt, &work[itemp], &i__2, info);
+
+/*              Call DORMLQ to compute ((VB**T)*(PB**T))*Q. */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormlq_("R", "N", ns, n, m, &a[a_offset], lda, &work[itau], &
+			vt[vt_offset], ldvt, &work[itemp], &i__2, info);
+	    }
+	} else {
+
+/*           Path 2t (N greater than M, but not much larger) */
+/*           Reduce to bidiagonal form without LQ decomposition */
+/*           A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */
+/*           U = QB * UB; V**T = VB**T * PB**T */
+
+/*           Bidiagonalize A */
+/*           (Workspace: need 4*M+N, prefer 4*M+(M+N)*NB) */
+
+	    id = 1;
+	    ie = id + *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    itemp = itaup + *m;
+	    i__2 = *lwork - itemp + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &work[id], &work[ie], &work[
+		    itauq], &work[itaup], &work[itemp], &i__2, info);
+
+/*           Solve eigenvalue problem TGK*Z=Z*S. */
+/*           (Workspace: need 2*M*M+14*M) */
+
+	    itgkz = itemp;
+	    itemp = itgkz + *m * ((*m << 1) + 1);
+	    i__2 = *m << 1;
+	    dbdsvdx_("L", jobz, rngtgk, m, &work[id], &work[ie], vl, vu, &
+		    iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[
+		    itemp], &iwork[1], info);
+
+/*           If needed, compute left singular vectors. */
+
+	    if (wantu) {
+		j = itgkz;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    dcopy_(m, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1);
+		    j += *m << 1;
+		}
+
+/*              Call DORMBR to compute QB*UB. */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[itemp], &i__2, info);
+	    }
+
+/*           If needed, compute right singular vectors. */
+
+	    if (wantvt) {
+		j = itgkz + *m;
+		i__2 = *ns;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    dcopy_(m, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt);
+		    j += *m << 1;
+		}
+		i__2 = *n - *m;
+		dlaset_("A", ns, &i__2, &c_b109, &c_b109, &vt[(*m + 1) * 
+			vt_dim1 + 1], ldvt);
+
+/*              Call DORMBR to compute VB**T * PB**T */
+/*              (Workspace in WORK( ITEMP ): need M, prefer M*NB) */
+
+		i__2 = *lwork - itemp + 1;
+		dormbr_("P", "R", "T", ns, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, 
+			info);
+	    }
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, info);
+	}
+	if (anrm < smlnum) {
+	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, info);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (doublereal) maxwrk;
+
+    return 0;
+
+/*     End of DGESVDX */
+
+} /* dgesvdx_ */
+
diff --git a/lapack-netlib/SRC/dgesvj.c b/lapack-netlib/SRC/dgesvj.c
new file mode 100644
index 000000000..a49531426
--- /dev/null
+++ b/lapack-netlib/SRC/dgesvj.c
@@ -0,0 +1,2233 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static doublereal c_b17 = 0.;
+static doublereal c_b18 = 1.;
+static integer c__1 = 1;
+static integer c__0 = 0;
+static integer c__2 = 2;
+
+/* > \brief \b DGESVJ */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGESVJ + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvj.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvj.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvj.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, */
+/*                          LDV, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LDV, LWORK, M, MV, N */
+/*       CHARACTER*1        JOBA, JOBU, JOBV */
+/*       DOUBLE PRECISION   A( LDA, * ), SVA( N ), V( LDV, * ), */
+/*      $                   WORK( LWORK ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGESVJ computes the singular value decomposition (SVD) of a real */
+/* > M-by-N matrix A, where M >= N. The SVD of A is written as */
+/* >                                    [++]   [xx]   [x0]   [xx] */
+/* >              A = U * SIGMA * V^t,  [++] = [xx] * [ox] * [xx] */
+/* >                                    [++]   [xx] */
+/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */
+/* > matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */
+/* > of SIGMA are the singular values of A. The columns of U and V are the */
+/* > left and the right singular vectors of A, respectively. */
+/* > DGESVJ can sometimes compute tiny singular values and their singular vectors much */
+/* > more accurately than other SVD routines, see below under Further Details. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] JOBA */
+/* > \verbatim */
+/* >          JOBA is CHARACTER*1 */
+/* >          Specifies the structure of A. */
+/* >          = 'L': The input matrix A is lower triangular; */
+/* >          = 'U': The input matrix A is upper triangular; */
+/* >          = 'G': The input matrix A is general M-by-N matrix, M >= N. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBU */
+/* > \verbatim */
+/* >          JOBU is CHARACTER*1 */
+/* >          Specifies whether to compute the left singular vectors */
+/* >          (columns of U): */
+/* >          = 'U': The left singular vectors corresponding to the nonzero */
+/* >                 singular values are computed and returned in the leading */
+/* >                 columns of A. See more details in the description of A. */
+/* >                 The default numerical orthogonality threshold is set to */
+/* >                 approximately TOL=CTOL*EPS, CTOL=DSQRT(M), EPS=DLAMCH('E'). */
+/* >          = 'C': Analogous to JOBU='U', except that user can control the */
+/* >                 level of numerical orthogonality of the computed left */
+/* >                 singular vectors. TOL can be set to TOL = CTOL*EPS, where */
+/* >                 CTOL is given on input in the array WORK. */
+/* >                 No CTOL smaller than ONE is allowed. CTOL greater */
+/* >                 than 1 / EPS is meaningless. The option 'C' */
+/* >                 can be used if M*EPS is satisfactory orthogonality */
+/* >                 of the computed left singular vectors, so CTOL=M could */
+/* >                 save few sweeps of Jacobi rotations. */
+/* >                 See the descriptions of A and WORK(1). */
+/* >          = 'N': The matrix U is not computed. However, see the */
+/* >                 description of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] JOBV */
+/* > \verbatim */
+/* >          JOBV is CHARACTER*1 */
+/* >          Specifies whether to compute the right singular vectors, that */
+/* >          is, the matrix V: */
+/* >          = 'V':  the matrix V is computed and returned in the array V */
+/* >          = 'A':  the Jacobi rotations are applied to the MV-by-N */
+/* >                  array V. In other words, the right singular vector */
+/* >                  matrix V is not computed explicitly, instead it is */
+/* >                  applied to an MV-by-N matrix initially stored in the */
+/* >                  first MV rows of V. */
+/* >          = 'N':  the matrix V is not computed and the array V is not */
+/* >                  referenced */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the input matrix A. */
+/* >          M >= N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit : */
+/* >          If JOBU = 'U' .OR. JOBU = 'C' : */
+/* >                 If INFO = 0 : */
+/* >                 RANKA orthonormal columns of U are returned in the */
+/* >                 leading RANKA columns of the array A. Here RANKA <= N */
+/* >                 is the number of computed singular values of A that are */
+/* >                 above the underflow threshold DLAMCH('S'). The singular */
+/* >                 vectors corresponding to underflowed or zero singular */
+/* >                 values are not computed. The value of RANKA is returned */
+/* >                 in the array WORK as RANKA=NINT(WORK(2)). Also see the */
+/* >                 descriptions of SVA and WORK. The computed columns of U */
+/* >                 are mutually numerically orthogonal up to approximately */
+/* >                 TOL=DSQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU = 'C'), */
+/* >                 see the description of JOBU. */
+/* >                 If INFO > 0 : */
+/* >                 the procedure DGESVJ did not converge in the given number */
+/* >                 of iterations (sweeps). In that case, the computed */
+/* >                 columns of U may not be orthogonal up to TOL. The output */
+/* >                 U (stored in A), SIGMA (given by the computed singular */
+/* >                 values in SVA(1:N)) and V is still a decomposition of the */
+/* >                 input matrix A in the sense that the residual */
+/* >                 ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small. */
+/* > */
+/* >          If JOBU = 'N' : */
+/* >                 If INFO = 0 : */
+/* >                 Note that the left singular vectors are 'for free' in the */
+/* >                 one-sided Jacobi SVD algorithm. However, if only the */
+/* >                 singular values are needed, the level of numerical */
+/* >                 orthogonality of U is not an issue and iterations are */
+/* >                 stopped when the columns of the iterated matrix are */
+/* >                 numerically orthogonal up to approximately M*EPS. Thus, */
+/* >                 on exit, A contains the columns of U scaled with the */
+/* >                 corresponding singular values. */
+/* >                 If INFO > 0 : */
+/* >                 the procedure DGESVJ did not converge in the given number */
+/* >                 of iterations (sweeps). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] SVA */
+/* > \verbatim */
+/* >          SVA is DOUBLE PRECISION array, dimension (N) */
+/* >          On exit : */
+/* >          If INFO = 0 : */
+/* >          depending on the value SCALE = WORK(1), we have: */
+/* >                 If SCALE = ONE : */
+/* >                 SVA(1:N) contains the computed singular values of A. */
+/* >                 During the computation SVA contains the Euclidean column */
+/* >                 norms of the iterated matrices in the array A. */
+/* >                 If SCALE .NE. ONE : */
+/* >                 The singular values of A are SCALE*SVA(1:N), and this */
+/* >                 factored representation is due to the fact that some of the */
+/* >                 singular values of A might underflow or overflow. */
+/* >          If INFO > 0 : */
+/* >          the procedure DGESVJ did not converge in the given number of */
+/* >          iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] MV */
+/* > \verbatim */
+/* >          MV is INTEGER */
+/* >          If JOBV = 'A', then the product of Jacobi rotations in DGESVJ */
+/* >          is applied to the first MV rows of V. See the description of JOBV. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] V */
+/* > \verbatim */
+/* >          V is DOUBLE PRECISION array, dimension (LDV,N) */
+/* >          If JOBV = 'V', then V contains on exit the N-by-N matrix of */
+/* >                         the right singular vectors; */
+/* >          If JOBV = 'A', then V contains the product of the computed right */
+/* >                         singular vector matrix and the initial matrix in */
+/* >                         the array V. */
+/* >          If JOBV = 'N', then V is not referenced. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDV */
+/* > \verbatim */
+/* >          LDV is INTEGER */
+/* >          The leading dimension of the array V, LDV >= 1. */
+/* >          If JOBV = 'V', then LDV >= f2cmax(1,N). */
+/* >          If JOBV = 'A', then LDV >= f2cmax(1,MV) . */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (LWORK) */
+/* >          On entry : */
+/* >          If JOBU = 'C' : */
+/* >          WORK(1) = CTOL, where CTOL defines the threshold for convergence. */
+/* >                    The process stops if all columns of A are mutually */
+/* >                    orthogonal up to CTOL*EPS, EPS=DLAMCH('E'). */
+/* >                    It is required that CTOL >= ONE, i.e. it is not */
+/* >                    allowed to force the routine to obtain orthogonality */
+/* >                    below EPS. */
+/* >          On exit : */
+/* >          WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) */
+/* >                    are the computed singular values of A. */
+/* >                    (See description of SVA().) */
+/* >          WORK(2) = NINT(WORK(2)) is the number of the computed nonzero */
+/* >                    singular values. */
+/* >          WORK(3) = NINT(WORK(3)) is the number of the computed singular */
+/* >                    values that are larger than the underflow threshold. */
+/* >          WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi */
+/* >                    rotations needed for numerical convergence. */
+/* >          WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. */
+/* >                    This is useful information in cases when DGESVJ did */
+/* >                    not converge, as it can be used to estimate whether */
+/* >                    the output is still useful and for post festum analysis. */
+/* >          WORK(6) = the largest absolute value over all sines of the */
+/* >                    Jacobi rotation angles in the last sweep. It can be */
+/* >                    useful for a post festum analysis. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          length of WORK, WORK >= MAX(6,M+N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit. */
+/* >          < 0:  if INFO = -i, then the i-th argument had an illegal value */
+/* >          > 0:  DGESVJ did not converge in the maximal allowed number (30) */
+/* >                of sweeps. The output may still be useful. See the */
+/* >                description of WORK. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup doubleGEcomputational */
+
+/* > \par Further Details: */
+/*  ===================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane */
+/* >  rotations. The rotations are implemented as fast scaled rotations of */
+/* >  Anda and Park [1]. In the case of underflow of the Jacobi angle, a */
+/* >  modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses */
+/* >  column interchanges of de Rijk [2]. The relative accuracy of the computed */
+/* >  singular values and the accuracy of the computed singular vectors (in */
+/* >  angle metric) is as guaranteed by the theory of Demmel and Veselic [3]. */
+/* >  The condition number that determines the accuracy in the full rank case */
+/* >  is essentially min_{D=diag} kappa(A*D), where kappa(.) is the */
+/* >  spectral condition number. The best performance of this Jacobi SVD */
+/* >  procedure is achieved if used in an  accelerated version of Drmac and */
+/* >  Veselic [5,6], and it is the kernel routine in the SIGMA library [7]. */
+/* >  Some tunning parameters (marked with [TP]) are available for the */
+/* >  implementer. */
+/* >  The computational range for the nonzero singular values is the  machine */
+/* >  number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even */
+/* >  denormalized singular values can be computed with the corresponding */
+/* >  gradual loss of accurate digits. */
+/* > \endverbatim */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* > \verbatim */
+/* > */
+/* >  ============ */
+/* > */
+/* >  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */
+/* > \endverbatim */
+
+/* > \par References: */
+/*  ================ */
+/* > */
+/* > \verbatim */
+/* > */
+/* > [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. */
+/* >     SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. */
+/* > [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the */
+/* >     singular value decomposition on a vector computer. */
+/* >     SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. */
+/* > [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. */
+/* > [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular */
+/* >     value computation in floating point arithmetic. */
+/* >     SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. */
+/* > [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */
+/* >     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */
+/* >     LAPACK Working note 169. */
+/* > [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */
+/* >     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */
+/* >     LAPACK Working note 170. */
+/* > [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */
+/* >     QSVD, (H,K)-SVD computations. */
+/* >     Department of Mathematics, University of Zagreb, 2008. */
+/* > \endverbatim */
+
+/* >  \par Bugs, examples and comments: */
+/*   ================================= */
+/* > */
+/* > \verbatim */
+/* >  =========================== */
+/* >  Please report all bugs and send interesting test examples and comments to */
+/* >  drmac@math.hr. Thank you. */
+/* > \endverbatim */
+/* > */
+/*  ===================================================================== */
+/* Subroutine */ int dgesvj_(char *joba, char *jobu, char *jobv, integer *m, 
+	integer *n, doublereal *a, integer *lda, doublereal *sva, integer *mv,
+	 doublereal *v, integer *ldv, doublereal *work, integer *lwork, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    doublereal aapp, aapq, aaqq;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    doublereal ctol;
+    integer ierr;
+    doublereal bigtheta;
+    integer pskipped;
+    doublereal aapp0;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    doublereal temp1;
+    integer i__, p, q;
+    doublereal t;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    doublereal large, apoaq, aqoap;
+    extern logical lsame_(char *, char *);
+    doublereal theta, small, sfmin;
+    logical lsvec;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    doublereal fastr[5];
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    doublereal epsln;
+    logical applv, rsvec;
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *);
+    logical uctol;
+    extern /* Subroutine */ int drotm_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *);
+    logical lower, upper, rotok;
+    integer n2, n4;
+    extern /* Subroutine */ int dgsvj0_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, integer *), dgsvj1_(
+	    char *, integer *, integer *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, integer *);
+    doublereal rootsfmin;
+    integer n34;
+    doublereal cs;
+    extern doublereal dlamch_(char *);
+    doublereal sn;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlaset_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, doublereal *, integer *), 
+	    xerbla_(char *, integer *, ftnlen);
+    integer ijblsk, swband, blskip;
+    doublereal mxaapq;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *);
+    doublereal thsign, mxsinj;
+    integer ir1, emptsw, notrot, iswrot, jbc;
+    doublereal big;
+    integer kbl, lkahead, igl, ibr, jgl, nbl;
+    doublereal skl;
+    logical goscale;
+    doublereal tol;
+    integer mvl;
+    logical noscale;
+    doublereal rootbig, rooteps;
+    integer rowskip;
+    doublereal roottol;
+
+
+/*  -- LAPACK computational routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+/*  ===================================================================== */
+
+/*     from BLAS */
+/*     from LAPACK */
+/*     from BLAS */
+/*     from LAPACK */
+
+
+/*     Test the input arguments */
+
+    /* Parameter adjustments */
+    --sva;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1 * 1;
+    v -= v_offset;
+    --work;
+
+    /* Function Body */
+    lsvec = lsame_(jobu, "U");
+    uctol = lsame_(jobu, "C");
+    rsvec = lsame_(jobv, "V");
+    applv = lsame_(jobv, "A");
+    upper = lsame_(joba, "U");
+    lower = lsame_(joba, "L");
+
+    if (! (upper || lower || lsame_(joba, "G"))) {
+	*info = -1;
+    } else if (! (lsvec || uctol || lsame_(jobu, "N"))) 
+	    {
+	*info = -2;
+    } else if (! (rsvec || applv || lsame_(jobv, "N"))) 
+	    {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0 || *n > *m) {
+	*info = -5;
+    } else if (*lda < *m) {
+	*info = -7;
+    } else if (*mv < 0) {
+	*info = -9;
+    } else if (rsvec && *ldv < *n || applv && *ldv < *mv) {
+	*info = -11;
+    } else if (uctol && work[1] <= 1.) {
+	*info = -12;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = *m + *n;
+	if (*lwork < f2cmax(i__1,6)) {
+	    *info = -13;
+	} else {
+	    *info = 0;
+	}
+    }
+
+/*     #:( */
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESVJ", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/* #:) Quick return for void matrix */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Set numerical parameters */
+/*     The stopping criterion for Jacobi rotations is */
+
+/*     max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS */
+
+/*     where EPS is the round-off and CTOL is defined as follows: */
+
+    if (uctol) {
+/*        ... user controlled */
+	ctol = work[1];
+    } else {
+/*        ... default */
+	if (lsvec || rsvec || applv) {
+	    ctol = sqrt((doublereal) (*m));
+	} else {
+	    ctol = (doublereal) (*m);
+	}
+    }
+/*     ... and the machine dependent parameters are */
+/* [!]  (Make sure that DLAMCH() works properly on the target machine.) */
+
+    epsln = dlamch_("Epsilon");
+    rooteps = sqrt(epsln);
+    sfmin = dlamch_("SafeMinimum");
+    rootsfmin = sqrt(sfmin);
+    small = sfmin / epsln;
+    big = dlamch_("Overflow");
+/*     BIG         = ONE    / SFMIN */
+    rootbig = 1. / rootsfmin;
+    large = big / sqrt((doublereal) (*m * *n));
+    bigtheta = 1. / rooteps;
+
+    tol = ctol * epsln;
+    roottol = sqrt(tol);
+
+    if ((doublereal) (*m) * epsln >= 1.) {
+	*info = -4;
+	i__1 = -(*info);
+	xerbla_("DGESVJ", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Initialize the right singular vector matrix. */
+
+    if (rsvec) {
+	mvl = *n;
+	dlaset_("A", &mvl, n, &c_b17, &c_b18, &v[v_offset], ldv);
+    } else if (applv) {
+	mvl = *mv;
+    }
+    rsvec = rsvec || applv;
+
+/*     Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) */
+/* (!)  If necessary, scale A to protect the largest singular value */
+/*     from overflow. It is possible that saving the largest singular */
+/*     value destroys the information about the small ones. */
+/*     This initial scaling is almost minimal in the sense that the */
+/*     goal is to make sure that no column norm overflows, and that */
+/*     DSQRT(N)*max_i SVA(i) does not overflow. If INFinite entries */
+/*     in A are detected, the procedure returns with INFO=-6. */
+
+    skl = 1. / sqrt((doublereal) (*m) * (doublereal) (*n));
+    noscale = TRUE_;
+    goscale = TRUE_;
+
+    if (lower) {
+/*        the input matrix is M-by-N lower triangular (trapezoidal) */
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    aapp = 0.;
+	    aaqq = 1.;
+	    i__2 = *m - p + 1;
+	    dlassq_(&i__2, &a[p + p * a_dim1], &c__1, &aapp, &aaqq);
+	    if (aapp > big) {
+		*info = -6;
+		i__2 = -(*info);
+		xerbla_("DGESVJ", &i__2, (ftnlen)6);
+		return 0;
+	    }
+	    aaqq = sqrt(aaqq);
+	    if (aapp < big / aaqq && noscale) {
+		sva[p] = aapp * aaqq;
+	    } else {
+		noscale = FALSE_;
+		sva[p] = aapp * (aaqq * skl);
+		if (goscale) {
+		    goscale = FALSE_;
+		    i__2 = p - 1;
+		    for (q = 1; q <= i__2; ++q) {
+			sva[q] *= skl;
+/* L1873: */
+		    }
+		}
+	    }
+/* L1874: */
+	}
+    } else if (upper) {
+/*        the input matrix is M-by-N upper triangular (trapezoidal) */
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    aapp = 0.;
+	    aaqq = 1.;
+	    dlassq_(&p, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
+	    if (aapp > big) {
+		*info = -6;
+		i__2 = -(*info);
+		xerbla_("DGESVJ", &i__2, (ftnlen)6);
+		return 0;
+	    }
+	    aaqq = sqrt(aaqq);
+	    if (aapp < big / aaqq && noscale) {
+		sva[p] = aapp * aaqq;
+	    } else {
+		noscale = FALSE_;
+		sva[p] = aapp * (aaqq * skl);
+		if (goscale) {
+		    goscale = FALSE_;
+		    i__2 = p - 1;
+		    for (q = 1; q <= i__2; ++q) {
+			sva[q] *= skl;
+/* L2873: */
+		    }
+		}
+	    }
+/* L2874: */
+	}
+    } else {
+/*        the input matrix is M-by-N general dense */
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    aapp = 0.;
+	    aaqq = 1.;
+	    dlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
+	    if (aapp > big) {
+		*info = -6;
+		i__2 = -(*info);
+		xerbla_("DGESVJ", &i__2, (ftnlen)6);
+		return 0;
+	    }
+	    aaqq = sqrt(aaqq);
+	    if (aapp < big / aaqq && noscale) {
+		sva[p] = aapp * aaqq;
+	    } else {
+		noscale = FALSE_;
+		sva[p] = aapp * (aaqq * skl);
+		if (goscale) {
+		    goscale = FALSE_;
+		    i__2 = p - 1;
+		    for (q = 1; q <= i__2; ++q) {
+			sva[q] *= skl;
+/* L3873: */
+		    }
+		}
+	    }
+/* L3874: */
+	}
+    }
+
+    if (noscale) {
+	skl = 1.;
+    }
+
+/*     Move the smaller part of the spectrum from the underflow threshold */
+/* (!)  Start by determining the position of the nonzero entries of the */
+/*     array SVA() relative to ( SFMIN, BIG ). */
+
+    aapp = 0.;
+    aaqq = big;
+    i__1 = *n;
+    for (p = 1; p <= i__1; ++p) {
+	if (sva[p] != 0.) {
+/* Computing MIN */
+	    d__1 = aaqq, d__2 = sva[p];
+	    aaqq = f2cmin(d__1,d__2);
+	}
+/* Computing MAX */
+	d__1 = aapp, d__2 = sva[p];
+	aapp = f2cmax(d__1,d__2);
+/* L4781: */
+    }
+
+/* #:) Quick return for zero matrix */
+
+    if (aapp == 0.) {
+	if (lsvec) {
+	    dlaset_("G", m, n, &c_b17, &c_b18, &a[a_offset], lda);
+	}
+	work[1] = 1.;
+	work[2] = 0.;
+	work[3] = 0.;
+	work[4] = 0.;
+	work[5] = 0.;
+	work[6] = 0.;
+	return 0;
+    }
+
+/* #:) Quick return for one-column matrix */
+
+    if (*n == 1) {
+	if (lsvec) {
+	    dlascl_("G", &c__0, &c__0, &sva[1], &skl, m, &c__1, &a[a_dim1 + 1]
+		    , lda, &ierr);
+	}
+	work[1] = 1. / skl;
+	if (sva[1] >= sfmin) {
+	    work[2] = 1.;
+	} else {
+	    work[2] = 0.;
+	}
+	work[3] = 0.;
+	work[4] = 0.;
+	work[5] = 0.;
+	work[6] = 0.;
+	return 0;
+    }
+
+/*     Protect small singular values from underflow, and try to */
+/*     avoid underflows/overflows in computing Jacobi rotations. */
+
+    sn = sqrt(sfmin / epsln);
+    temp1 = sqrt(big / (doublereal) (*n));
+    if (aapp <= sn || aaqq >= temp1 || sn <= aaqq && aapp <= temp1) {
+/* Computing MIN */
+	d__1 = big, d__2 = temp1 / aapp;
+	temp1 = f2cmin(d__1,d__2);
+/*         AAQQ  = AAQQ*TEMP1 */
+/*         AAPP  = AAPP*TEMP1 */
+    } else if (aaqq <= sn && aapp <= temp1) {
+/* Computing MIN */
+	d__1 = sn / aaqq, d__2 = big / (aapp * sqrt((doublereal) (*n)));
+	temp1 = f2cmin(d__1,d__2);
+/*         AAQQ  = AAQQ*TEMP1 */
+/*         AAPP  = AAPP*TEMP1 */
+    } else if (aaqq >= sn && aapp >= temp1) {
+/* Computing MAX */
+	d__1 = sn / aaqq, d__2 = temp1 / aapp;
+	temp1 = f2cmax(d__1,d__2);
+/*         AAQQ  = AAQQ*TEMP1 */
+/*         AAPP  = AAPP*TEMP1 */
+    } else if (aaqq <= sn && aapp >= temp1) {
+/* Computing MIN */
+	d__1 = sn / aaqq, d__2 = big / (sqrt((doublereal) (*n)) * aapp);
+	temp1 = f2cmin(d__1,d__2);
+/*         AAQQ  = AAQQ*TEMP1 */
+/*         AAPP  = AAPP*TEMP1 */
+    } else {
+	temp1 = 1.;
+    }
+
+/*     Scale, if necessary */
+
+    if (temp1 != 1.) {
+	dlascl_("G", &c__0, &c__0, &c_b18, &temp1, n, &c__1, &sva[1], n, &
+		ierr);
+    }
+    skl = temp1 * skl;
+    if (skl != 1.) {
+	dlascl_(joba, &c__0, &c__0, &c_b18, &skl, m, n, &a[a_offset], lda, &
+		ierr);
+	skl = 1. / skl;
+    }
+
+/*     Row-cyclic Jacobi SVD algorithm with column pivoting */
+
+    emptsw = *n * (*n - 1) / 2;
+    notrot = 0;
+    fastr[0] = 0.;
+
+/*     A is represented in factored form A = A * diag(WORK), where diag(WORK) */
+/*     is initialized to identity. WORK is updated during fast scaled */
+/*     rotations. */
+
+    i__1 = *n;
+    for (q = 1; q <= i__1; ++q) {
+	work[q] = 1.;
+/* L1868: */
+    }
+
+
+    swband = 3;
+/* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */
+/*     if DGESVJ is used as a computational routine in the preconditioned */
+/*     Jacobi SVD algorithm DGESVJ. For sweeps i=1:SWBAND the procedure */
+/*     works on pivots inside a band-like region around the diagonal. */
+/*     The boundaries are determined dynamically, based on the number of */
+/*     pivots above a threshold. */
+
+    kbl = f2cmin(8,*n);
+/* [TP] KBL is a tuning parameter that defines the tile size in the */
+/*     tiling of the p-q loops of pivot pairs. In general, an optimal */
+/*     value of KBL depends on the matrix dimensions and on the */
+/*     parameters of the computer's memory. */
+
+    nbl = *n / kbl;
+    if (nbl * kbl != *n) {
+	++nbl;
+    }
+
+/* Computing 2nd power */
+    i__1 = kbl;
+    blskip = i__1 * i__1;
+/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */
+
+    rowskip = f2cmin(5,kbl);
+/* [TP] ROWSKIP is a tuning parameter. */
+
+    lkahead = 1;
+/* [TP] LKAHEAD is a tuning parameter. */
+
+/*     Quasi block transformations, using the lower (upper) triangular */
+/*     structure of the input matrix. The quasi-block-cycling usually */
+/*     invokes cubic convergence. Big part of this cycle is done inside */
+/*     canonical subspaces of dimensions less than M. */
+
+/* Computing MAX */
+    i__1 = 64, i__2 = kbl << 2;
+    if ((lower || upper) && *n > f2cmax(i__1,i__2)) {
+/* [TP] The number of partition levels and the actual partition are */
+/*     tuning parameters. */
+	n4 = *n / 4;
+	n2 = *n / 2;
+	n34 = n4 * 3;
+	if (applv) {
+	    q = 0;
+	} else {
+	    q = 1;
+	}
+
+	if (lower) {
+
+/*     This works very well on lower triangular matrices, in particular */
+/*     in the framework of the preconditioned Jacobi SVD (xGEJSV). */
+/*     The idea is simple: */
+/*     [+ 0 0 0]   Note that Jacobi transformations of [0 0] */
+/*     [+ + 0 0]                                       [0 0] */
+/*     [+ + x 0]   actually work on [x 0]              [x 0] */
+/*     [+ + x x]                    [x x].             [x x] */
+
+	    i__1 = *m - n34;
+	    i__2 = *n - n34;
+	    i__3 = *lwork - *n;
+	    dgsvj0_(jobv, &i__1, &i__2, &a[n34 + 1 + (n34 + 1) * a_dim1], lda,
+		     &work[n34 + 1], &sva[n34 + 1], &mvl, &v[n34 * q + 1 + (
+		    n34 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, &
+		    work[*n + 1], &i__3, &ierr);
+
+	    i__1 = *m - n2;
+	    i__2 = n34 - n2;
+	    i__3 = *lwork - *n;
+	    dgsvj0_(jobv, &i__1, &i__2, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, &
+		    work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1)
+		     * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, &work[*n + 
+		    1], &i__3, &ierr);
+
+	    i__1 = *m - n2;
+	    i__2 = *n - n2;
+	    i__3 = *lwork - *n;
+	    dgsvj1_(jobv, &i__1, &i__2, &n4, &a[n2 + 1 + (n2 + 1) * a_dim1], 
+		    lda, &work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (
+		    n2 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &
+		    work[*n + 1], &i__3, &ierr);
+
+	    i__1 = *m - n4;
+	    i__2 = n2 - n4;
+	    i__3 = *lwork - *n;
+	    dgsvj0_(jobv, &i__1, &i__2, &a[n4 + 1 + (n4 + 1) * a_dim1], lda, &
+		    work[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1)
+		     * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 
+		    1], &i__3, &ierr);
+
+	    i__1 = *lwork - *n;
+	    dgsvj0_(jobv, m, &n4, &a[a_offset], lda, &work[1], &sva[1], &mvl, 
+		    &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n 
+		    + 1], &i__1, &ierr);
+
+	    i__1 = *lwork - *n;
+	    dgsvj1_(jobv, m, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], &
+		    mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &
+		    work[*n + 1], &i__1, &ierr);
+
+
+	} else if (upper) {
+
+
+	    i__1 = *lwork - *n;
+	    dgsvj0_(jobv, &n4, &n4, &a[a_offset], lda, &work[1], &sva[1], &
+		    mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__2, &
+		    work[*n + 1], &i__1, &ierr);
+
+	    i__1 = *lwork - *n;
+	    dgsvj0_(jobv, &n2, &n4, &a[(n4 + 1) * a_dim1 + 1], lda, &work[n4 
+		    + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) * 
+		    v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 1], 
+		    &i__1, &ierr);
+
+	    i__1 = *lwork - *n;
+	    dgsvj1_(jobv, &n2, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1],
+		     &mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &
+		    work[*n + 1], &i__1, &ierr);
+
+	    i__1 = n2 + n4;
+	    i__2 = *lwork - *n;
+	    dgsvj0_(jobv, &i__1, &n4, &a[(n2 + 1) * a_dim1 + 1], lda, &work[
+		    n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) * 
+		    v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 1], 
+		    &i__2, &ierr);
+	}
+
+    }
+
+
+    for (i__ = 1; i__ <= 30; ++i__) {
+
+
+	mxaapq = 0.;
+	mxsinj = 0.;
+	iswrot = 0;
+
+	notrot = 0;
+	pskipped = 0;
+
+/*     Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */
+/*     1 <= p < q <= N. This is the first step toward a blocked implementation */
+/*     of the rotations. New implementation, based on block transformations, */
+/*     is under development. */
+
+	i__1 = nbl;
+	for (ibr = 1; ibr <= i__1; ++ibr) {
+
+	    igl = (ibr - 1) * kbl + 1;
+
+/* Computing MIN */
+	    i__3 = lkahead, i__4 = nbl - ibr;
+	    i__2 = f2cmin(i__3,i__4);
+	    for (ir1 = 0; ir1 <= i__2; ++ir1) {
+
+		igl += ir1 * kbl;
+
+/* Computing MIN */
+		i__4 = igl + kbl - 1, i__5 = *n - 1;
+		i__3 = f2cmin(i__4,i__5);
+		for (p = igl; p <= i__3; ++p) {
+
+
+		    i__4 = *n - p + 1;
+		    q = idamax_(&i__4, &sva[p], &c__1) + p - 1;
+		    if (p != q) {
+			dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 
+				1], &c__1);
+			if (rsvec) {
+			    dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				    v_dim1 + 1], &c__1);
+			}
+			temp1 = sva[p];
+			sva[p] = sva[q];
+			sva[q] = temp1;
+			temp1 = work[p];
+			work[p] = work[q];
+			work[q] = temp1;
+		    }
+
+		    if (ir1 == 0) {
+
+/*        Column norms are periodically updated by explicit */
+/*        norm computation. */
+/*        Caveat: */
+/*        Unfortunately, some BLAS implementations compute DNRM2(M,A(1,p),1) */
+/*        as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to */
+/*        overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and to */
+/*        underflow for ||A(:,p)||_2 < DSQRT(underflow_threshold). */
+/*        Hence, DNRM2 cannot be trusted, not even in the case when */
+/*        the true norm is far from the under(over)flow boundaries. */
+/*        If properly implemented DNRM2 is available, the IF-THEN-ELSE */
+/*        below should read "AAPP = DNRM2( M, A(1,p), 1 ) * WORK(p)". */
+
+			if (sva[p] < rootbig && sva[p] > rootsfmin) {
+			    sva[p] = dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * 
+				    work[p];
+			} else {
+			    temp1 = 0.;
+			    aapp = 1.;
+			    dlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, &
+				    aapp);
+			    sva[p] = temp1 * sqrt(aapp) * work[p];
+			}
+			aapp = sva[p];
+		    } else {
+			aapp = sva[p];
+		    }
+
+		    if (aapp > 0.) {
+
+			pskipped = 0;
+
+/* Computing MIN */
+			i__5 = igl + kbl - 1;
+			i__4 = f2cmin(i__5,*n);
+			for (q = p + 1; q <= i__4; ++q) {
+
+			    aaqq = sva[q];
+
+			    if (aaqq > 0.) {
+
+				aapp0 = aapp;
+				if (aaqq >= 1.) {
+				    rotok = small * aapp <= aaqq;
+				    if (aapp < big / aaqq) {
+					aapq = ddot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * work[p] * work[q] / 
+						aaqq / aapp;
+				    } else {
+					dcopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					dlascl_("G", &c__0, &c__0, &aapp, &
+						work[p], m, &c__1, &work[*n + 
+						1], lda, &ierr);
+					aapq = ddot_(m, &work[*n + 1], &c__1, 
+						&a[q * a_dim1 + 1], &c__1) * 
+						work[q] / aaqq;
+				    }
+				} else {
+				    rotok = aapp <= aaqq / small;
+				    if (aapp > small / aaqq) {
+					aapq = ddot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * work[p] * work[q] / 
+						aaqq / aapp;
+				    } else {
+					dcopy_(m, &a[q * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					dlascl_("G", &c__0, &c__0, &aaqq, &
+						work[q], m, &c__1, &work[*n + 
+						1], lda, &ierr);
+					aapq = ddot_(m, &work[*n + 1], &c__1, 
+						&a[p * a_dim1 + 1], &c__1) * 
+						work[p] / aapp;
+				    }
+				}
+
+/* Computing MAX */
+				d__1 = mxaapq, d__2 = abs(aapq);
+				mxaapq = f2cmax(d__1,d__2);
+
+/*        TO rotate or NOT to rotate, THAT is the question ... */
+
+				if (abs(aapq) > tol) {
+
+/* [RTD]      ROTATED = ROTATED + ONE */
+
+				    if (ir1 == 0) {
+					notrot = 0;
+					pskipped = 0;
+					++iswrot;
+				    }
+
+				    if (rotok) {
+
+					aqoap = aaqq / aapp;
+					apoaq = aapp / aaqq;
+					theta = (d__1 = aqoap - apoaq, abs(
+						d__1)) * -.5 / aapq;
+
+					if (abs(theta) > bigtheta) {
+
+					    t = .5 / theta;
+					    fastr[2] = t * work[p] / work[q];
+					    fastr[3] = -t * work[q] / work[p];
+					    drotm_(m, &a[p * a_dim1 + 1], &
+						    c__1, &a[q * a_dim1 + 1], 
+						    &c__1, fastr);
+					    if (rsvec) {
+			  drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				  v_dim1 + 1], &c__1, fastr);
+					    }
+/* Computing MAX */
+					    d__1 = 0., d__2 = t * apoaq * 
+						    aapq + 1.;
+					    sva[q] = aaqq * sqrt((f2cmax(d__1,
+						    d__2)));
+/* Computing MAX */
+					    d__1 = 0., d__2 = 1. - t * aqoap *
+						     aapq;
+					    aapp *= sqrt((f2cmax(d__1,d__2)));
+/* Computing MAX */
+					    d__1 = mxsinj, d__2 = abs(t);
+					    mxsinj = f2cmax(d__1,d__2);
+
+					} else {
+
+
+					    thsign = -d_sign(&c_b18, &aapq);
+					    t = 1. / (theta + thsign * sqrt(
+						    theta * theta + 1.));
+					    cs = sqrt(1. / (t * t + 1.));
+					    sn = t * cs;
+
+/* Computing MAX */
+					    d__1 = mxsinj, d__2 = abs(sn);
+					    mxsinj = f2cmax(d__1,d__2);
+/* Computing MAX */
+					    d__1 = 0., d__2 = t * apoaq * 
+						    aapq + 1.;
+					    sva[q] = aaqq * sqrt((f2cmax(d__1,
+						    d__2)));
+/* Computing MAX */
+					    d__1 = 0., d__2 = 1. - t * aqoap *
+						     aapq;
+					    aapp *= sqrt((f2cmax(d__1,d__2)));
+
+					    apoaq = work[p] / work[q];
+					    aqoap = work[q] / work[p];
+					    if (work[p] >= 1.) {
+			  if (work[q] >= 1.) {
+			      fastr[2] = t * apoaq;
+			      fastr[3] = -t * aqoap;
+			      work[p] *= cs;
+			      work[q] *= cs;
+			      drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * 
+				      a_dim1 + 1], &c__1, fastr);
+			      if (rsvec) {
+				  drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
+					  q * v_dim1 + 1], &c__1, fastr);
+			      }
+			  } else {
+			      d__1 = -t * aqoap;
+			      daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      d__1 = cs * sn * apoaq;
+			      daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      work[p] *= cs;
+			      work[q] /= cs;
+			      if (rsvec) {
+				  d__1 = -t * aqoap;
+				  daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+				  d__1 = cs * sn * apoaq;
+				  daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+			      }
+			  }
+					    } else {
+			  if (work[q] >= 1.) {
+			      d__1 = t * apoaq;
+			      daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      d__1 = -cs * sn * aqoap;
+			      daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      work[p] /= cs;
+			      work[q] *= cs;
+			      if (rsvec) {
+				  d__1 = t * apoaq;
+				  daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+				  d__1 = -cs * sn * aqoap;
+				  daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+			      }
+			  } else {
+			      if (work[p] >= work[q]) {
+				  d__1 = -t * aqoap;
+				  daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  d__1 = cs * sn * apoaq;
+				  daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  work[p] *= cs;
+				  work[q] /= cs;
+				  if (rsvec) {
+				      d__1 = -t * aqoap;
+				      daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				      d__1 = cs * sn * apoaq;
+				      daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				  }
+			      } else {
+				  d__1 = t * apoaq;
+				  daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  d__1 = -cs * sn * aqoap;
+				  daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  work[p] /= cs;
+				  work[q] *= cs;
+				  if (rsvec) {
+				      d__1 = t * apoaq;
+				      daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				      d__1 = -cs * sn * aqoap;
+				      daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				  }
+			      }
+			  }
+					    }
+					}
+
+				    } else {
+					dcopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					dlascl_("G", &c__0, &c__0, &aapp, &
+						c_b18, m, &c__1, &work[*n + 1]
+						, lda, &ierr);
+					dlascl_("G", &c__0, &c__0, &aaqq, &
+						c_b18, m, &c__1, &a[q * 
+						a_dim1 + 1], lda, &ierr);
+					temp1 = -aapq * work[p] / work[q];
+					daxpy_(m, &temp1, &work[*n + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1);
+					dlascl_("G", &c__0, &c__0, &c_b18, &
+						aaqq, m, &c__1, &a[q * a_dim1 
+						+ 1], lda, &ierr);
+/* Computing MAX */
+					d__1 = 0., d__2 = 1. - aapq * aapq;
+					sva[q] = aaqq * sqrt((f2cmax(d__1,d__2)))
+						;
+					mxsinj = f2cmax(mxsinj,sfmin);
+				    }
+/*           END IF ROTOK THEN ... ELSE */
+
+/*           In the case of cancellation in updating SVA(q), SVA(p) */
+/*           recompute SVA(q), SVA(p). */
+
+/* Computing 2nd power */
+				    d__1 = sva[q] / aaqq;
+				    if (d__1 * d__1 <= rooteps) {
+					if (aaqq < rootbig && aaqq > 
+						rootsfmin) {
+					    sva[q] = dnrm2_(m, &a[q * a_dim1 
+						    + 1], &c__1) * work[q];
+					} else {
+					    t = 0.;
+					    aaqq = 1.;
+					    dlassq_(m, &a[q * a_dim1 + 1], &
+						    c__1, &t, &aaqq);
+					    sva[q] = t * sqrt(aaqq) * work[q];
+					}
+				    }
+				    if (aapp / aapp0 <= rooteps) {
+					if (aapp < rootbig && aapp > 
+						rootsfmin) {
+					    aapp = dnrm2_(m, &a[p * a_dim1 + 
+						    1], &c__1) * work[p];
+					} else {
+					    t = 0.;
+					    aapp = 1.;
+					    dlassq_(m, &a[p * a_dim1 + 1], &
+						    c__1, &t, &aapp);
+					    aapp = t * sqrt(aapp) * work[p];
+					}
+					sva[p] = aapp;
+				    }
+
+				} else {
+/*        A(:,p) and A(:,q) already numerically orthogonal */
+				    if (ir1 == 0) {
+					++notrot;
+				    }
+/* [RTD]      SKIPPED  = SKIPPED  + 1 */
+				    ++pskipped;
+				}
+			    } else {
+/*        A(:,q) is zero column */
+				if (ir1 == 0) {
+				    ++notrot;
+				}
+				++pskipped;
+			    }
+
+			    if (i__ <= swband && pskipped > rowskip) {
+				if (ir1 == 0) {
+				    aapp = -aapp;
+				}
+				notrot = 0;
+				goto L2103;
+			    }
+
+/* L2002: */
+			}
+/*     END q-LOOP */
+
+L2103:
+/*     bailed out of q-loop */
+
+			sva[p] = aapp;
+
+		    } else {
+			sva[p] = aapp;
+			if (ir1 == 0 && aapp == 0.) {
+/* Computing MIN */
+			    i__4 = igl + kbl - 1;
+			    notrot = notrot + f2cmin(i__4,*n) - p;
+			}
+		    }
+
+/* L2001: */
+		}
+/*     end of the p-loop */
+/*     end of doing the block ( ibr, ibr ) */
+/* L1002: */
+	    }
+/*     end of ir1-loop */
+
+/* ... go to the off diagonal blocks */
+
+	    igl = (ibr - 1) * kbl + 1;
+
+	    i__2 = nbl;
+	    for (jbc = ibr + 1; jbc <= i__2; ++jbc) {
+
+		jgl = (jbc - 1) * kbl + 1;
+
+/*        doing the block at ( ibr, jbc ) */
+
+		ijblsk = 0;
+/* Computing MIN */
+		i__4 = igl + kbl - 1;
+		i__3 = f2cmin(i__4,*n);
+		for (p = igl; p <= i__3; ++p) {
+
+		    aapp = sva[p];
+		    if (aapp > 0.) {
+
+			pskipped = 0;
+
+/* Computing MIN */
+			i__5 = jgl + kbl - 1;
+			i__4 = f2cmin(i__5,*n);
+			for (q = jgl; q <= i__4; ++q) {
+
+			    aaqq = sva[q];
+			    if (aaqq > 0.) {
+				aapp0 = aapp;
+
+
+/*        Safe Gram matrix computation */
+
+				if (aaqq >= 1.) {
+				    if (aapp >= aaqq) {
+					rotok = small * aapp <= aaqq;
+				    } else {
+					rotok = small * aaqq <= aapp;
+				    }
+				    if (aapp < big / aaqq) {
+					aapq = ddot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * work[p] * work[q] / 
+						aaqq / aapp;
+				    } else {
+					dcopy_(m, &a[p * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					dlascl_("G", &c__0, &c__0, &aapp, &
+						work[p], m, &c__1, &work[*n + 
+						1], lda, &ierr);
+					aapq = ddot_(m, &work[*n + 1], &c__1, 
+						&a[q * a_dim1 + 1], &c__1) * 
+						work[q] / aaqq;
+				    }
+				} else {
+				    if (aapp >= aaqq) {
+					rotok = aapp <= aaqq / small;
+				    } else {
+					rotok = aaqq <= aapp / small;
+				    }
+				    if (aapp > small / aaqq) {
+					aapq = ddot_(m, &a[p * a_dim1 + 1], &
+						c__1, &a[q * a_dim1 + 1], &
+						c__1) * work[p] * work[q] / 
+						aaqq / aapp;
+				    } else {
+					dcopy_(m, &a[q * a_dim1 + 1], &c__1, &
+						work[*n + 1], &c__1);
+					dlascl_("G", &c__0, &c__0, &aaqq, &
+						work[q], m, &c__1, &work[*n + 
+						1], lda, &ierr);
+					aapq = ddot_(m, &work[*n + 1], &c__1, 
+						&a[p * a_dim1 + 1], &c__1) * 
+						work[p] / aapp;
+				    }
+				}
+
+/* Computing MAX */
+				d__1 = mxaapq, d__2 = abs(aapq);
+				mxaapq = f2cmax(d__1,d__2);
+
+/*        TO rotate or NOT to rotate, THAT is the question ... */
+
+				if (abs(aapq) > tol) {
+				    notrot = 0;
+/* [RTD]      ROTATED  = ROTATED + 1 */
+				    pskipped = 0;
+				    ++iswrot;
+
+				    if (rotok) {
+
+					aqoap = aaqq / aapp;
+					apoaq = aapp / aaqq;
+					theta = (d__1 = aqoap - apoaq, abs(
+						d__1)) * -.5 / aapq;
+					if (aaqq > aapp0) {
+					    theta = -theta;
+					}
+
+					if (abs(theta) > bigtheta) {
+					    t = .5 / theta;
+					    fastr[2] = t * work[p] / work[q];
+					    fastr[3] = -t * work[q] / work[p];
+					    drotm_(m, &a[p * a_dim1 + 1], &
+						    c__1, &a[q * a_dim1 + 1], 
+						    &c__1, fastr);
+					    if (rsvec) {
+			  drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
+				  v_dim1 + 1], &c__1, fastr);
+					    }
+/* Computing MAX */
+					    d__1 = 0., d__2 = t * apoaq * 
+						    aapq + 1.;
+					    sva[q] = aaqq * sqrt((f2cmax(d__1,
+						    d__2)));
+/* Computing MAX */
+					    d__1 = 0., d__2 = 1. - t * aqoap *
+						     aapq;
+					    aapp *= sqrt((f2cmax(d__1,d__2)));
+/* Computing MAX */
+					    d__1 = mxsinj, d__2 = abs(t);
+					    mxsinj = f2cmax(d__1,d__2);
+					} else {
+
+
+					    thsign = -d_sign(&c_b18, &aapq);
+					    if (aaqq > aapp0) {
+			  thsign = -thsign;
+					    }
+					    t = 1. / (theta + thsign * sqrt(
+						    theta * theta + 1.));
+					    cs = sqrt(1. / (t * t + 1.));
+					    sn = t * cs;
+/* Computing MAX */
+					    d__1 = mxsinj, d__2 = abs(sn);
+					    mxsinj = f2cmax(d__1,d__2);
+/* Computing MAX */
+					    d__1 = 0., d__2 = t * apoaq * 
+						    aapq + 1.;
+					    sva[q] = aaqq * sqrt((f2cmax(d__1,
+						    d__2)));
+/* Computing MAX */
+					    d__1 = 0., d__2 = 1. - t * aqoap *
+						     aapq;
+					    aapp *= sqrt((f2cmax(d__1,d__2)));
+
+					    apoaq = work[p] / work[q];
+					    aqoap = work[q] / work[p];
+					    if (work[p] >= 1.) {
+
+			  if (work[q] >= 1.) {
+			      fastr[2] = t * apoaq;
+			      fastr[3] = -t * aqoap;
+			      work[p] *= cs;
+			      work[q] *= cs;
+			      drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * 
+				      a_dim1 + 1], &c__1, fastr);
+			      if (rsvec) {
+				  drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
+					  q * v_dim1 + 1], &c__1, fastr);
+			      }
+			  } else {
+			      d__1 = -t * aqoap;
+			      daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      d__1 = cs * sn * apoaq;
+			      daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      if (rsvec) {
+				  d__1 = -t * aqoap;
+				  daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+				  d__1 = cs * sn * apoaq;
+				  daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+			      }
+			      work[p] *= cs;
+			      work[q] /= cs;
+			  }
+					    } else {
+			  if (work[q] >= 1.) {
+			      d__1 = t * apoaq;
+			      daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[
+				      q * a_dim1 + 1], &c__1);
+			      d__1 = -cs * sn * aqoap;
+			      daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[
+				      p * a_dim1 + 1], &c__1);
+			      if (rsvec) {
+				  d__1 = t * apoaq;
+				  daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &
+					  c__1, &v[q * v_dim1 + 1], &c__1);
+				  d__1 = -cs * sn * aqoap;
+				  daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &
+					  c__1, &v[p * v_dim1 + 1], &c__1);
+			      }
+			      work[p] /= cs;
+			      work[q] *= cs;
+			  } else {
+			      if (work[p] >= work[q]) {
+				  d__1 = -t * aqoap;
+				  daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  d__1 = cs * sn * apoaq;
+				  daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  work[p] *= cs;
+				  work[q] /= cs;
+				  if (rsvec) {
+				      d__1 = -t * aqoap;
+				      daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				      d__1 = cs * sn * apoaq;
+				      daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				  }
+			      } else {
+				  d__1 = t * apoaq;
+				  daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, 
+					  &a[q * a_dim1 + 1], &c__1);
+				  d__1 = -cs * sn * aqoap;
+				  daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, 
+					  &a[p * a_dim1 + 1], &c__1);
+				  work[p] /= cs;
+				  work[q] *= cs;
+				  if (rsvec) {
+				      d__1 = t * apoaq;
+				      daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], 
+					      &c__1, &v[q * v_dim1 + 1], &
+					      c__1);
+				      d__1 = -cs * sn * aqoap;
+				      daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], 
+					      &c__1, &v[p * v_dim1 + 1], &
+					      c__1);
+				  }
+			      }
+			  }
+					    }
+					}
+
+				    } else {
+					if (aapp > aaqq) {
+					    dcopy_(m, &a[p * a_dim1 + 1], &
+						    c__1, &work[*n + 1], &
+						    c__1);
+					    dlascl_("G", &c__0, &c__0, &aapp, 
+						    &c_b18, m, &c__1, &work[*
+						    n + 1], lda, &ierr);
+					    dlascl_("G", &c__0, &c__0, &aaqq, 
+						    &c_b18, m, &c__1, &a[q * 
+						    a_dim1 + 1], lda, &ierr);
+					    temp1 = -aapq * work[p] / work[q];
+					    daxpy_(m, &temp1, &work[*n + 1], &
+						    c__1, &a[q * a_dim1 + 1], 
+						    &c__1);
+					    dlascl_("G", &c__0, &c__0, &c_b18,
+						     &aaqq, m, &c__1, &a[q * 
+						    a_dim1 + 1], lda, &ierr);
+/* Computing MAX */
+					    d__1 = 0., d__2 = 1. - aapq * 
+						    aapq;
+					    sva[q] = aaqq * sqrt((f2cmax(d__1,
+						    d__2)));
+					    mxsinj = f2cmax(mxsinj,sfmin);
+					} else {
+					    dcopy_(m, &a[q * a_dim1 + 1], &
+						    c__1, &work[*n + 1], &
+						    c__1);
+					    dlascl_("G", &c__0, &c__0, &aaqq, 
+						    &c_b18, m, &c__1, &work[*
+						    n + 1], lda, &ierr);
+					    dlascl_("G", &c__0, &c__0, &aapp, 
+						    &c_b18, m, &c__1, &a[p * 
+						    a_dim1 + 1], lda, &ierr);
+					    temp1 = -aapq * work[q] / work[p];
+					    daxpy_(m, &temp1, &work[*n + 1], &
+						    c__1, &a[p * a_dim1 + 1], 
+						    &c__1);
+					    dlascl_("G", &c__0, &c__0, &c_b18,
+						     &aapp, m, &c__1, &a[p * 
+						    a_dim1 + 1], lda, &ierr);
+/* Computing MAX */
+					    d__1 = 0., d__2 = 1. - aapq * 
+						    aapq;
+					    sva[p] = aapp * sqrt((f2cmax(d__1,
+						    d__2)));
+					    mxsinj = f2cmax(mxsinj,sfmin);
+					}
+				    }
+/*           END IF ROTOK THEN ... ELSE */
+
+/*           In the case of cancellation in updating SVA(q) */
+/* Computing 2nd power */
+				    d__1 = sva[q] / aaqq;
+				    if (d__1 * d__1 <= rooteps) {
+					if (aaqq < rootbig && aaqq > 
+						rootsfmin) {
+					    sva[q] = dnrm2_(m, &a[q * a_dim1 
+						    + 1], &c__1) * work[q];
+					} else {
+					    t = 0.;
+					    aaqq = 1.;
+					    dlassq_(m, &a[q * a_dim1 + 1], &
+						    c__1, &t, &aaqq);
+					    sva[q] = t * sqrt(aaqq) * work[q];
+					}
+				    }
+/* Computing 2nd power */
+				    d__1 = aapp / aapp0;
+				    if (d__1 * d__1 <= rooteps) {
+					if (aapp < rootbig && aapp > 
+						rootsfmin) {
+					    aapp = dnrm2_(m, &a[p * a_dim1 + 
+						    1], &c__1) * work[p];
+					} else {
+					    t = 0.;
+					    aapp = 1.;
+					    dlassq_(m, &a[p * a_dim1 + 1], &
+						    c__1, &t, &aapp);
+					    aapp = t * sqrt(aapp) * work[p];
+					}
+					sva[p] = aapp;
+				    }
+/*              end of OK rotation */
+				} else {
+				    ++notrot;
+/* [RTD]      SKIPPED  = SKIPPED  + 1 */
+				    ++pskipped;
+				    ++ijblsk;
+				}
+			    } else {
+				++notrot;
+				++pskipped;
+				++ijblsk;
+			    }
+
+			    if (i__ <= swband && ijblsk >= blskip) {
+				sva[p] = aapp;
+				notrot = 0;
+				goto L2011;
+			    }
+			    if (i__ <= swband && pskipped > rowskip) {
+				aapp = -aapp;
+				notrot = 0;
+				goto L2203;
+			    }
+
+/* L2200: */
+			}
+/*        end of the q-loop */
+L2203:
+
+			sva[p] = aapp;
+
+		    } else {
+
+			if (aapp == 0.) {
+/* Computing MIN */
+			    i__4 = jgl + kbl - 1;
+			    notrot = notrot + f2cmin(i__4,*n) - jgl + 1;
+			}
+			if (aapp < 0.) {
+			    notrot = 0;
+			}
+
+		    }
+
+/* L2100: */
+		}
+/*     end of the p-loop */
+/* L2010: */
+	    }
+/*     end of the jbc-loop */
+L2011:
+/* 2011 bailed out of the jbc-loop */
+/* Computing MIN */
+	    i__3 = igl + kbl - 1;
+	    i__2 = f2cmin(i__3,*n);
+	    for (p = igl; p <= i__2; ++p) {
+		sva[p] = (d__1 = sva[p], abs(d__1));
+/* L2012: */
+	    }
+/* ** */
+/* L2000: */
+	}
+/* 2000 :: end of the ibr-loop */
+
+	if (sva[*n] < rootbig && sva[*n] > rootsfmin) {
+	    sva[*n] = dnrm2_(m, &a[*n * a_dim1 + 1], &c__1) * work[*n];
+	} else {
+	    t = 0.;
+	    aapp = 1.;
+	    dlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp);
+	    sva[*n] = t * sqrt(aapp) * work[*n];
+	}
+
+/*     Additional steering devices */
+
+	if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) {
+	    swband = i__;
+	}
+
+	if (i__ > swband + 1 && mxaapq < sqrt((doublereal) (*n)) * tol && (
+		doublereal) (*n) * mxaapq * mxsinj < tol) {
+	    goto L1994;
+	}
+
+	if (notrot >= emptsw) {
+	    goto L1994;
+	}
+
+/* L1993: */
+    }
+/*     end i=1:NSWEEP loop */
+
+/* #:( Reaching this point means that the procedure has not converged. */
+    *info = 29;
+    goto L1995;
+
+L1994:
+/* #:) Reaching this point means numerical convergence after the i-th */
+/*     sweep. */
+
+    *info = 0;
+/* #:) INFO = 0 confirms successful iterations. */
+L1995:
+
+/*     Sort the singular values and find how many are above */
+/*     the underflow threshold. */
+
+    n2 = 0;
+    n4 = 0;
+    i__1 = *n - 1;
+    for (p = 1; p <= i__1; ++p) {
+	i__2 = *n - p + 1;
+	q = idamax_(&i__2, &sva[p], &c__1) + p - 1;
+	if (p != q) {
+	    temp1 = sva[p];
+	    sva[p] = sva[q];
+	    sva[q] = temp1;
+	    temp1 = work[p];
+	    work[p] = work[q];
+	    work[q] = temp1;
+	    dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1);
+	    if (rsvec) {
+		dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &
+			c__1);
+	    }
+	}
+	if (sva[p] != 0.) {
+	    ++n4;
+	    if (sva[p] * skl > sfmin) {
+		++n2;
+	    }
+	}
+/* L5991: */
+    }
+    if (sva[*n] != 0.) {
+	++n4;
+	if (sva[*n] * skl > sfmin) {
+	    ++n2;
+	}
+    }
+
+/*     Normalize the left singular vectors. */
+
+    if (lsvec || uctol) {
+	i__1 = n2;
+	for (p = 1; p <= i__1; ++p) {
+	    d__1 = work[p] / sva[p];
+	    dscal_(m, &d__1, &a[p * a_dim1 + 1], &c__1);
+/* L1998: */
+	}
+    }
+
+/*     Scale the product of Jacobi rotations (assemble the fast rotations). */
+
+    if (rsvec) {
+	if (applv) {
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		dscal_(&mvl, &work[p], &v[p * v_dim1 + 1], &c__1);
+/* L2398: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (p = 1; p <= i__1; ++p) {
+		temp1 = 1. / dnrm2_(&mvl, &v[p * v_dim1 + 1], &c__1);
+		dscal_(&mvl, &temp1, &v[p * v_dim1 + 1], &c__1);
+/* L2399: */
+	    }
+	}
+    }
+
+/*     Undo scaling, if necessary (and possible). */
+    if (skl > 1. && sva[1] < big / skl || skl < 1. && sva[f2cmax(n2,1)] > sfmin /
+	     skl) {
+	i__1 = *n;
+	for (p = 1; p <= i__1; ++p) {
+	    sva[p] = skl * sva[p];
+/* L2400: */
+	}
+	skl = 1.;
+    }
+
+    work[1] = skl;
+/*     The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE */
+/*     then some of the singular values may overflow or underflow and */
+/*     the spectrum is given in this factored representation. */
+
+    work[2] = (doublereal) n4;
+/*     N4 is the number of computed nonzero singular values of A. */
+
+    work[3] = (doublereal) n2;
+/*     N2 is the number of singular values of A greater than SFMIN. */
+/*     If N2<N, SVA(N2:N) contains ZEROS and/or denormalized numbers */
+/*     that may carry some information. */
+
+    work[4] = (doublereal) i__;
+/*     i is the index of the last sweep before declaring convergence. */
+
+    work[5] = mxaapq;
+/*     MXAAPQ is the largest absolute value of scaled pivots in the */
+/*     last sweep */
+
+    work[6] = mxsinj;
+/*     MXSINJ is the largest absolute value of the sines of Jacobi angles */
+/*     in the last sweep */
+
+    return 0;
+} /* dgesvj_ */
+
diff --git a/lapack-netlib/SRC/dgesvx.c b/lapack-netlib/SRC/dgesvx.c
new file mode 100644
index 000000000..5636330ca
--- /dev/null
+++ b/lapack-netlib/SRC/dgesvx.c
@@ -0,0 +1,1068 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief <b> DGESVX computes the solution to system of linear equations A * X = B for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGESVX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvx.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvx.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvx.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */
+/*                          EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, */
+/*                          WORK, IWORK, INFO ) */
+
+/*       CHARACTER          EQUED, FACT, TRANS */
+/*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS */
+/*       DOUBLE PRECISION   RCOND */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
+/*      $                   BERR( * ), C( * ), FERR( * ), R( * ), */
+/*      $                   WORK( * ), X( LDX, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGESVX uses the LU factorization to compute the solution to a real */
+/* > system of linear equations */
+/* >    A * X = B, */
+/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */
+/* > */
+/* > Error bounds on the solution and a condition estimate are also */
+/* > provided. */
+/* > \endverbatim */
+
+/* > \par Description: */
+/*  ================= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > The following steps are performed: */
+/* > */
+/* > 1. If FACT = 'E', real scaling factors are computed to equilibrate */
+/* >    the system: */
+/* >       TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B */
+/* >       TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */
+/* >       TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */
+/* >    Whether or not the system will be equilibrated depends on the */
+/* >    scaling of the matrix A, but if equilibration is used, A is */
+/* >    overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */
+/* >    or diag(C)*B (if TRANS = 'T' or 'C'). */
+/* > */
+/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the */
+/* >    matrix A (after equilibration if FACT = 'E') as */
+/* >       A = P * L * U, */
+/* >    where P is a permutation matrix, L is a unit lower triangular */
+/* >    matrix, and U is upper triangular. */
+/* > */
+/* > 3. If some U(i,i)=0, so that U is exactly singular, then the routine */
+/* >    returns with INFO = i. Otherwise, the factored form of A is used */
+/* >    to estimate the condition number of the matrix A.  If the */
+/* >    reciprocal of the condition number is less than machine precision, */
+/* >    INFO = N+1 is returned as a warning, but the routine still goes on */
+/* >    to solve for X and compute error bounds as described below. */
+/* > */
+/* > 4. The system of equations is solved for X using the factored form */
+/* >    of A. */
+/* > */
+/* > 5. Iterative refinement is applied to improve the computed solution */
+/* >    matrix and calculate error bounds and backward error estimates */
+/* >    for it. */
+/* > */
+/* > 6. If equilibration was used, the matrix X is premultiplied by */
+/* >    diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */
+/* >    that it solves the original system before equilibration. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] FACT */
+/* > \verbatim */
+/* >          FACT is CHARACTER*1 */
+/* >          Specifies whether or not the factored form of the matrix A is */
+/* >          supplied on entry, and if not, whether the matrix A should be */
+/* >          equilibrated before it is factored. */
+/* >          = 'F':  On entry, AF and IPIV contain the factored form of A. */
+/* >                  If EQUED is not 'N', the matrix A has been */
+/* >                  equilibrated with scaling factors given by R and C. */
+/* >                  A, AF, and IPIV are not modified. */
+/* >          = 'N':  The matrix A will be copied to AF and factored. */
+/* >          = 'E':  The matrix A will be equilibrated if necessary, then */
+/* >                  copied to AF and factored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations: */
+/* >          = 'N':  A * X = B     (No transpose) */
+/* >          = 'T':  A**T * X = B  (Transpose) */
+/* >          = 'C':  A**H * X = B  (Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of linear equations, i.e., the order of the */
+/* >          matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the N-by-N matrix A.  If FACT = 'F' and EQUED is */
+/* >          not 'N', then A must have been equilibrated by the scaling */
+/* >          factors in R and/or C.  A is not modified if FACT = 'F' or */
+/* >          'N', or if FACT = 'E' and EQUED = 'N' on exit. */
+/* > */
+/* >          On exit, if EQUED .ne. 'N', A is scaled as follows: */
+/* >          EQUED = 'R':  A := diag(R) * A */
+/* >          EQUED = 'C':  A := A * diag(C) */
+/* >          EQUED = 'B':  A := diag(R) * A * diag(C). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AF */
+/* > \verbatim */
+/* >          AF is DOUBLE PRECISION array, dimension (LDAF,N) */
+/* >          If FACT = 'F', then AF is an input argument and on entry */
+/* >          contains the factors L and U from the factorization */
+/* >          A = P*L*U as computed by DGETRF.  If EQUED .ne. 'N', then */
+/* >          AF is the factored form of the equilibrated matrix A. */
+/* > */
+/* >          If FACT = 'N', then AF is an output argument and on exit */
+/* >          returns the factors L and U from the factorization A = P*L*U */
+/* >          of the original matrix A. */
+/* > */
+/* >          If FACT = 'E', then AF is an output argument and on exit */
+/* >          returns the factors L and U from the factorization A = P*L*U */
+/* >          of the equilibrated matrix A (see the description of A for */
+/* >          the form of the equilibrated matrix). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >          The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          If FACT = 'F', then IPIV is an input argument and on entry */
+/* >          contains the pivot indices from the factorization A = P*L*U */
+/* >          as computed by DGETRF; row i of the matrix was interchanged */
+/* >          with row IPIV(i). */
+/* > */
+/* >          If FACT = 'N', then IPIV is an output argument and on exit */
+/* >          contains the pivot indices from the factorization A = P*L*U */
+/* >          of the original matrix A. */
+/* > */
+/* >          If FACT = 'E', then IPIV is an output argument and on exit */
+/* >          contains the pivot indices from the factorization A = P*L*U */
+/* >          of the equilibrated matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] EQUED */
+/* > \verbatim */
+/* >          EQUED is CHARACTER*1 */
+/* >          Specifies the form of equilibration that was done. */
+/* >          = 'N':  No equilibration (always true if FACT = 'N'). */
+/* >          = 'R':  Row equilibration, i.e., A has been premultiplied by */
+/* >                  diag(R). */
+/* >          = 'C':  Column equilibration, i.e., A has been postmultiplied */
+/* >                  by diag(C). */
+/* >          = 'B':  Both row and column equilibration, i.e., A has been */
+/* >                  replaced by diag(R) * A * diag(C). */
+/* >          EQUED is an input argument if FACT = 'F'; otherwise, it is an */
+/* >          output argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (N) */
+/* >          The row scale factors for A.  If EQUED = 'R' or 'B', A is */
+/* >          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */
+/* >          is not accessed.  R is an input argument if FACT = 'F'; */
+/* >          otherwise, R is an output argument.  If FACT = 'F' and */
+/* >          EQUED = 'R' or 'B', each element of R must be positive. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >          The column scale factors for A.  If EQUED = 'C' or 'B', A is */
+/* >          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */
+/* >          is not accessed.  C is an input argument if FACT = 'F'; */
+/* >          otherwise, C is an output argument.  If FACT = 'F' and */
+/* >          EQUED = 'C' or 'B', each element of C must be positive. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the N-by-NRHS right hand side matrix B. */
+/* >          On exit, */
+/* >          if EQUED = 'N', B is not modified; */
+/* >          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */
+/* >          diag(R)*B; */
+/* >          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */
+/* >          overwritten by diag(C)*B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] X */
+/* > \verbatim */
+/* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
+/* >          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */
+/* >          to the original system of equations.  Note that A and B are */
+/* >          modified on exit if EQUED .ne. 'N', and the solution to the */
+/* >          equilibrated system is inv(diag(C))*X if TRANS = 'N' and */
+/* >          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */
+/* >          and EQUED = 'R' or 'B'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >          The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >          The estimate of the reciprocal condition number of the matrix */
+/* >          A after equilibration (if done).  If RCOND is less than the */
+/* >          machine precision (in particular, if RCOND = 0), the matrix */
+/* >          is singular to working precision.  This condition is */
+/* >          indicated by a return code of INFO > 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] FERR */
+/* > \verbatim */
+/* >          FERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The estimated forward error bound for each solution vector */
+/* >          X(j) (the j-th column of the solution matrix X). */
+/* >          If XTRUE is the true solution corresponding to X(j), FERR(j) */
+/* >          is an estimated upper bound for the magnitude of the largest */
+/* >          element in (X(j) - XTRUE) divided by the magnitude of the */
+/* >          largest element in X(j).  The estimate is as reliable as */
+/* >          the estimate for RCOND, and is almost always a slight */
+/* >          overestimate of the true error. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >          The componentwise relative backward error of each solution */
+/* >          vector X(j) (i.e., the smallest relative change in */
+/* >          any element of A or B that makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (4*N) */
+/* >          On exit, WORK(1) contains the reciprocal pivot growth */
+/* >          factor norm(A)/norm(U). The "f2cmax absolute element" norm is */
+/* >          used. If WORK(1) is much less than 1, then the stability */
+/* >          of the LU factorization of the (equilibrated) matrix A */
+/* >          could be poor. This also means that the solution X, condition */
+/* >          estimator RCOND, and forward error bound FERR could be */
+/* >          unreliable. If factorization fails with 0<INFO<=N, then */
+/* >          WORK(1) contains the reciprocal pivot growth factor for the */
+/* >          leading INFO columns of A. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, and i is */
+/* >                <= N:  U(i,i) is exactly zero.  The factorization has */
+/* >                       been completed, but the factor U is exactly */
+/* >                       singular, so the solution and error bounds */
+/* >                       could not be computed. RCOND = 0 is returned. */
+/* >                = N+1: U is nonsingular, but RCOND is less than machine */
+/* >                       precision, meaning that the matrix is singular */
+/* >                       to working precision.  Nevertheless, the */
+/* >                       solution and error bounds are computed because */
+/* >                       there are a number of situations where the */
+/* >                       computed solution can be more accurate than the */
+/* >                       value of RCOND would suggest. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup doubleGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgesvx_(char *fact, char *trans, integer *n, integer *
+	nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, 
+	integer *ipiv, char *equed, doublereal *r__, doublereal *c__, 
+	doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *
+	rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *
+	iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
+	    x_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    doublereal amax;
+    char norm[1];
+    integer i__, j;
+    extern logical lsame_(char *, char *);
+    doublereal rcmin, rcmax, anorm;
+    logical equil;
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dlaqge_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, char *), dgecon_(char *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *);
+    doublereal colcnd;
+    logical nofact;
+    extern /* Subroutine */ int dgeequ_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *), dgerfs_(char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    doublereal *, doublereal *, integer *, integer *), 
+	    dgetrf_(integer *, integer *, doublereal *, integer *, integer *, 
+	    integer *), dlacpy_(char *, integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *), xerbla_(char *, 
+	    integer *, ftnlen);
+    doublereal bignum;
+    extern doublereal dlantr_(char *, char *, char *, integer *, integer *, 
+	    doublereal *, integer *, doublereal *);
+    integer infequ;
+    logical colequ;
+    extern /* Subroutine */ int dgetrs_(char *, integer *, integer *, 
+	    doublereal *, integer *, integer *, doublereal *, integer *, 
+	    integer *);
+    doublereal rowcnd;
+    logical notran;
+    doublereal smlnum;
+    logical rowequ;
+    doublereal rpvgrw;
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    --r__;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --ferr;
+    --berr;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    nofact = lsame_(fact, "N");
+    equil = lsame_(fact, "E");
+    notran = lsame_(trans, "N");
+    if (nofact || equil) {
+	*(unsigned char *)equed = 'N';
+	rowequ = FALSE_;
+	colequ = FALSE_;
+    } else {
+	rowequ = lsame_(equed, "R") || lsame_(equed, 
+		"B");
+	colequ = lsame_(equed, "C") || lsame_(equed, 
+		"B");
+	smlnum = dlamch_("Safe minimum");
+	bignum = 1. / smlnum;
+    }
+
+/*     Test the input parameters. */
+
+    if (! nofact && ! equil && ! lsame_(fact, "F")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -6;
+    } else if (*ldaf < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (lsame_(fact, "F") && ! (rowequ || colequ 
+	    || lsame_(equed, "N"))) {
+	*info = -10;
+    } else {
+	if (rowequ) {
+	    rcmin = bignum;
+	    rcmax = 0.;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		d__1 = rcmin, d__2 = r__[j];
+		rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+		d__1 = rcmax, d__2 = r__[j];
+		rcmax = f2cmax(d__1,d__2);
+/* L10: */
+	    }
+	    if (rcmin <= 0.) {
+		*info = -11;
+	    } else if (*n > 0) {
+		rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		rowcnd = 1.;
+	    }
+	}
+	if (colequ && *info == 0) {
+	    rcmin = bignum;
+	    rcmax = 0.;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		d__1 = rcmin, d__2 = c__[j];
+		rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+		d__1 = rcmax, d__2 = c__[j];
+		rcmax = f2cmax(d__1,d__2);
+/* L20: */
+	    }
+	    if (rcmin <= 0.) {
+		*info = -12;
+	    } else if (*n > 0) {
+		colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		colcnd = 1.;
+	    }
+	}
+	if (*info == 0) {
+	    if (*ldb < f2cmax(1,*n)) {
+		*info = -14;
+	    } else if (*ldx < f2cmax(1,*n)) {
+		*info = -16;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESVX", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+    if (equil) {
+
+/*        Compute row and column scalings to equilibrate the matrix A. */
+
+	dgeequ_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, &
+		amax, &infequ);
+	if (infequ == 0) {
+
+/*           Equilibrate the matrix. */
+
+	    dlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &
+		    colcnd, &amax, equed);
+	    rowequ = lsame_(equed, "R") || lsame_(equed,
+		     "B");
+	    colequ = lsame_(equed, "C") || lsame_(equed,
+		     "B");
+	}
+    }
+
+/*     Scale the right hand side. */
+
+    if (notran) {
+	if (rowequ) {
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1];
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (colequ) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1];
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+    if (nofact || equil) {
+
+/*        Compute the LU factorization of A. */
+
+	dlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf);
+	dgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info);
+
+/*        Return if INFO is non-zero. */
+
+	if (*info > 0) {
+
+/*           Compute the reciprocal pivot growth factor of the */
+/*           leading rank-deficient INFO columns of A. */
+
+	    rpvgrw = dlantr_("M", "U", "N", info, info, &af[af_offset], ldaf, 
+		    &work[1]);
+	    if (rpvgrw == 0.) {
+		rpvgrw = 1.;
+	    } else {
+		rpvgrw = dlange_("M", n, info, &a[a_offset], lda, &work[1]) / rpvgrw;
+	    }
+	    work[1] = rpvgrw;
+	    *rcond = 0.;
+	    return 0;
+	}
+    }
+
+/*     Compute the norm of the matrix A and the */
+/*     reciprocal pivot growth factor RPVGRW. */
+
+    if (notran) {
+	*(unsigned char *)norm = '1';
+    } else {
+	*(unsigned char *)norm = 'I';
+    }
+    anorm = dlange_(norm, n, n, &a[a_offset], lda, &work[1]);
+    rpvgrw = dlantr_("M", "U", "N", n, n, &af[af_offset], ldaf, &work[1]);
+    if (rpvgrw == 0.) {
+	rpvgrw = 1.;
+    } else {
+	rpvgrw = dlange_("M", n, n, &a[a_offset], lda, &work[1]) / 
+		rpvgrw;
+    }
+
+/*     Compute the reciprocal of the condition number of A. */
+
+    dgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1],
+	     info);
+
+/*     Compute the solution matrix X. */
+
+    dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
+    dgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx,
+	     info);
+
+/*     Use iterative refinement to improve the computed solution and */
+/*     compute error bounds and backward error estimates for it. */
+
+    dgerfs_(trans, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1],
+	     &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[
+	    1], &iwork[1], info);
+
+/*     Transform the solution matrix X to a solution of the original */
+/*     system. */
+
+    if (notran) {
+	if (colequ) {
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1];
+/* L70: */
+		}
+/* L80: */
+	    }
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		ferr[j] /= colcnd;
+/* L90: */
+	    }
+	}
+    } else if (rowequ) {
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1];
+/* L100: */
+	    }
+/* L110: */
+	}
+	i__1 = *nrhs;
+	for (j = 1; j <= i__1; ++j) {
+	    ferr[j] /= rowcnd;
+/* L120: */
+	}
+    }
+
+    work[1] = rpvgrw;
+
+/*     Set INFO = N+1 if the matrix is singular to working precision. */
+
+    if (*rcond < dlamch_("Epsilon")) {
+	*info = *n + 1;
+    }
+    return 0;
+
+/*     End of DGESVX */
+
+} /* dgesvx_ */
+
diff --git a/lapack-netlib/SRC/dgesvxx.c b/lapack-netlib/SRC/dgesvxx.c
new file mode 100644
index 000000000..27f99827a
--- /dev/null
+++ b/lapack-netlib/SRC/dgesvxx.c
@@ -0,0 +1,1211 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* > \brief <b> DGESVXX computes the solution to system of linear equations A * X = B for GE matrices</b> */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGESVXX + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvxx
+.f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvxx
+.f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvxx
+.f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */
+/*                           EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, */
+/*                           BERR, N_ERR_BNDS, ERR_BNDS_NORM, */
+/*                           ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, */
+/*                           INFO ) */
+
+/*       CHARACTER          EQUED, FACT, TRANS */
+/*       INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */
+/*      $                   N_ERR_BNDS */
+/*       DOUBLE PRECISION   RCOND, RPVGRW */
+/*       INTEGER            IPIV( * ), IWORK( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */
+/*      $                   X( LDX , * ),WORK( * ) */
+/*       DOUBLE PRECISION   R( * ), C( * ), PARAMS( * ), BERR( * ), */
+/*      $                   ERR_BNDS_NORM( NRHS, * ), */
+/*      $                   ERR_BNDS_COMP( NRHS, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    DGESVXX uses the LU factorization to compute the solution to a */
+/* >    double precision system of linear equations  A * X = B,  where A is an */
+/* >    N-by-N matrix and X and B are N-by-NRHS matrices. */
+/* > */
+/* >    If requested, both normwise and maximum componentwise error bounds */
+/* >    are returned. DGESVXX will return a solution with a tiny */
+/* >    guaranteed error (O(eps) where eps is the working machine */
+/* >    precision) unless the matrix is very ill-conditioned, in which */
+/* >    case a warning is returned. Relevant condition numbers also are */
+/* >    calculated and returned. */
+/* > */
+/* >    DGESVXX accepts user-provided factorizations and equilibration */
+/* >    factors; see the definitions of the FACT and EQUED options. */
+/* >    Solving with refinement and using a factorization from a previous */
+/* >    DGESVXX call will also produce a solution with either O(eps) */
+/* >    errors or warnings, but we cannot make that claim for general */
+/* >    user-provided factorizations and equilibration factors if they */
+/* >    differ from what DGESVXX would itself produce. */
+/* > \endverbatim */
+
+/* > \par Description: */
+/*  ================= */
+/* > */
+/* > \verbatim */
+/* > */
+/* >    The following steps are performed: */
+/* > */
+/* >    1. If FACT = 'E', double precision scaling factors are computed to equilibrate */
+/* >    the system: */
+/* > */
+/* >      TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B */
+/* >      TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */
+/* >      TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */
+/* > */
+/* >    Whether or not the system will be equilibrated depends on the */
+/* >    scaling of the matrix A, but if equilibration is used, A is */
+/* >    overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */
+/* >    or diag(C)*B (if TRANS = 'T' or 'C'). */
+/* > */
+/* >    2. If FACT = 'N' or 'E', the LU decomposition is used to factor */
+/* >    the matrix A (after equilibration if FACT = 'E') as */
+/* > */
+/* >      A = P * L * U, */
+/* > */
+/* >    where P is a permutation matrix, L is a unit lower triangular */
+/* >    matrix, and U is upper triangular. */
+/* > */
+/* >    3. If some U(i,i)=0, so that U is exactly singular, then the */
+/* >    routine returns with INFO = i. Otherwise, the factored form of A */
+/* >    is used to estimate the condition number of the matrix A (see */
+/* >    argument RCOND). If the reciprocal of the condition number is less */
+/* >    than machine precision, the routine still goes on to solve for X */
+/* >    and compute error bounds as described below. */
+/* > */
+/* >    4. The system of equations is solved for X using the factored form */
+/* >    of A. */
+/* > */
+/* >    5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */
+/* >    the routine will use iterative refinement to try to get a small */
+/* >    error and error bounds.  Refinement calculates the residual to at */
+/* >    least twice the working precision. */
+/* > */
+/* >    6. If equilibration was used, the matrix X is premultiplied by */
+/* >    diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */
+/* >    that it solves the original system before equilibration. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \verbatim */
+/* >     Some optional parameters are bundled in the PARAMS array.  These */
+/* >     settings determine how refinement is performed, but often the */
+/* >     defaults are acceptable.  If the defaults are acceptable, users */
+/* >     can pass NPARAMS = 0 which prevents the source code from accessing */
+/* >     the PARAMS argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] FACT */
+/* > \verbatim */
+/* >          FACT is CHARACTER*1 */
+/* >     Specifies whether or not the factored form of the matrix A is */
+/* >     supplied on entry, and if not, whether the matrix A should be */
+/* >     equilibrated before it is factored. */
+/* >       = 'F':  On entry, AF and IPIV contain the factored form of A. */
+/* >               If EQUED is not 'N', the matrix A has been */
+/* >               equilibrated with scaling factors given by R and C. */
+/* >               A, AF, and IPIV are not modified. */
+/* >       = 'N':  The matrix A will be copied to AF and factored. */
+/* >       = 'E':  The matrix A will be equilibrated if necessary, then */
+/* >               copied to AF and factored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >     Specifies the form of the system of equations: */
+/* >       = 'N':  A * X = B     (No transpose) */
+/* >       = 'T':  A**T * X = B  (Transpose) */
+/* >       = 'C':  A**H * X = B  (Conjugate Transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >     The number of linear equations, i.e., the order of the */
+/* >     matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >     The number of right hand sides, i.e., the number of columns */
+/* >     of the matrices B and X.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >     On entry, the N-by-N matrix A.  If FACT = 'F' and EQUED is */
+/* >     not 'N', then A must have been equilibrated by the scaling */
+/* >     factors in R and/or C.  A is not modified if FACT = 'F' or */
+/* >     'N', or if FACT = 'E' and EQUED = 'N' on exit. */
+/* > */
+/* >     On exit, if EQUED .ne. 'N', A is scaled as follows: */
+/* >     EQUED = 'R':  A := diag(R) * A */
+/* >     EQUED = 'C':  A := A * diag(C) */
+/* >     EQUED = 'B':  A := diag(R) * A * diag(C). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >     The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] AF */
+/* > \verbatim */
+/* >          AF is DOUBLE PRECISION array, dimension (LDAF,N) */
+/* >     If FACT = 'F', then AF is an input argument and on entry */
+/* >     contains the factors L and U from the factorization */
+/* >     A = P*L*U as computed by DGETRF.  If EQUED .ne. 'N', then */
+/* >     AF is the factored form of the equilibrated matrix A. */
+/* > */
+/* >     If FACT = 'N', then AF is an output argument and on exit */
+/* >     returns the factors L and U from the factorization A = P*L*U */
+/* >     of the original matrix A. */
+/* > */
+/* >     If FACT = 'E', then AF is an output argument and on exit */
+/* >     returns the factors L and U from the factorization A = P*L*U */
+/* >     of the equilibrated matrix A (see the description of A for */
+/* >     the form of the equilibrated matrix). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDAF */
+/* > \verbatim */
+/* >          LDAF is INTEGER */
+/* >     The leading dimension of the array AF.  LDAF >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >     If FACT = 'F', then IPIV is an input argument and on entry */
+/* >     contains the pivot indices from the factorization A = P*L*U */
+/* >     as computed by DGETRF; row i of the matrix was interchanged */
+/* >     with row IPIV(i). */
+/* > */
+/* >     If FACT = 'N', then IPIV is an output argument and on exit */
+/* >     contains the pivot indices from the factorization A = P*L*U */
+/* >     of the original matrix A. */
+/* > */
+/* >     If FACT = 'E', then IPIV is an output argument and on exit */
+/* >     contains the pivot indices from the factorization A = P*L*U */
+/* >     of the equilibrated matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] EQUED */
+/* > \verbatim */
+/* >          EQUED is CHARACTER*1 */
+/* >     Specifies the form of equilibration that was done. */
+/* >       = 'N':  No equilibration (always true if FACT = 'N'). */
+/* >       = 'R':  Row equilibration, i.e., A has been premultiplied by */
+/* >               diag(R). */
+/* >       = 'C':  Column equilibration, i.e., A has been postmultiplied */
+/* >               by diag(C). */
+/* >       = 'B':  Both row and column equilibration, i.e., A has been */
+/* >               replaced by diag(R) * A * diag(C). */
+/* >     EQUED is an input argument if FACT = 'F'; otherwise, it is an */
+/* >     output argument. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] R */
+/* > \verbatim */
+/* >          R is DOUBLE PRECISION array, dimension (N) */
+/* >     The row scale factors for A.  If EQUED = 'R' or 'B', A is */
+/* >     multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */
+/* >     is not accessed.  R is an input argument if FACT = 'F'; */
+/* >     otherwise, R is an output argument.  If FACT = 'F' and */
+/* >     EQUED = 'R' or 'B', each element of R must be positive. */
+/* >     If R is output, each element of R is a power of the radix. */
+/* >     If R is input, each element of R should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] C */
+/* > \verbatim */
+/* >          C is DOUBLE PRECISION array, dimension (N) */
+/* >     The column scale factors for A.  If EQUED = 'C' or 'B', A is */
+/* >     multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */
+/* >     is not accessed.  C is an input argument if FACT = 'F'; */
+/* >     otherwise, C is an output argument.  If FACT = 'F' and */
+/* >     EQUED = 'C' or 'B', each element of C must be positive. */
+/* >     If C is output, each element of C is a power of the radix. */
+/* >     If C is input, each element of C should be a power of the radix */
+/* >     to ensure a reliable solution and error estimates. Scaling by */
+/* >     powers of the radix does not cause rounding errors unless the */
+/* >     result underflows or overflows. Rounding errors during scaling */
+/* >     lead to refining with a matrix that is not equivalent to the */
+/* >     input matrix, producing error estimates that may not be */
+/* >     reliable. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >     On entry, the N-by-NRHS right hand side matrix B. */
+/* >     On exit, */
+/* >     if EQUED = 'N', B is not modified; */
+/* >     if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */
+/* >        diag(R)*B; */
+/* >     if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */
+/* >        overwritten by diag(C)*B. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >     The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] X */
+/* > \verbatim */
+/* >          X is DOUBLE PRECISION array, dimension (LDX,NRHS) */
+/* >     If INFO = 0, the N-by-NRHS solution matrix X to the original */
+/* >     system of equations.  Note that A and B are modified on exit */
+/* >     if EQUED .ne. 'N', and the solution to the equilibrated system is */
+/* >     inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */
+/* >     inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDX */
+/* > \verbatim */
+/* >          LDX is INTEGER */
+/* >     The leading dimension of the array X.  LDX >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RCOND */
+/* > \verbatim */
+/* >          RCOND is DOUBLE PRECISION */
+/* >     Reciprocal scaled condition number.  This is an estimate of the */
+/* >     reciprocal Skeel condition number of the matrix A after */
+/* >     equilibration (if done).  If this is less than the machine */
+/* >     precision (in particular, if it is zero), the matrix is singular */
+/* >     to working precision.  Note that the error may still be small even */
+/* >     if this number is very small and the matrix appears ill- */
+/* >     conditioned. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] RPVGRW */
+/* > \verbatim */
+/* >          RPVGRW is DOUBLE PRECISION */
+/* >     Reciprocal pivot growth.  On exit, this contains the reciprocal */
+/* >     pivot growth factor norm(A)/norm(U). The "f2cmax absolute element" */
+/* >     norm is used.  If this is much less than 1, then the stability of */
+/* >     the LU factorization of the (equilibrated) matrix A could be poor. */
+/* >     This also means that the solution X, estimated condition numbers, */
+/* >     and error bounds could be unreliable. If factorization fails with */
+/* >     0<INFO<=N, then this contains the reciprocal pivot growth factor */
+/* >     for the leading INFO columns of A.  In DGESVX, this quantity is */
+/* >     returned in WORK(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] BERR */
+/* > \verbatim */
+/* >          BERR is DOUBLE PRECISION array, dimension (NRHS) */
+/* >     Componentwise relative backward error.  This is the */
+/* >     componentwise relative backward error of each solution vector X(j) */
+/* >     (i.e., the smallest relative change in any element of A or B that */
+/* >     makes X(j) an exact solution). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N_ERR_BNDS */
+/* > \verbatim */
+/* >          N_ERR_BNDS is INTEGER */
+/* >     Number of error bounds to return for each right hand side */
+/* >     and each type (normwise or componentwise).  See ERR_BNDS_NORM and */
+/* >     ERR_BNDS_COMP below. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_NORM */
+/* > \verbatim */
+/* >          ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     normwise relative error, which is defined as follows: */
+/* > */
+/* >     Normwise relative error in the ith solution vector: */
+/* >             max_j (abs(XTRUE(j,i) - X(j,i))) */
+/* >            ------------------------------ */
+/* >                  max_j abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the type of error information as described */
+/* >     below. There currently are up to three pieces of information */
+/* >     returned. */
+/* > */
+/* >     The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_NORM(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated normwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * dlamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*A, where S scales each row by a power of the */
+/* >              radix so all absolute row sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] ERR_BNDS_COMP */
+/* > \verbatim */
+/* >          ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) */
+/* >     For each right-hand side, this array contains information about */
+/* >     various error bounds and condition numbers corresponding to the */
+/* >     componentwise relative error, which is defined as follows: */
+/* > */
+/* >     Componentwise relative error in the ith solution vector: */
+/* >                    abs(XTRUE(j,i) - X(j,i)) */
+/* >             max_j ---------------------- */
+/* >                         abs(X(j,i)) */
+/* > */
+/* >     The array is indexed by the right-hand side i (on which the */
+/* >     componentwise relative error depends), and the type of error */
+/* >     information as described below. There currently are up to three */
+/* >     pieces of information returned for each right-hand side. If */
+/* >     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
+/* >     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS < 3, then at most */
+/* >     the first (:,N_ERR_BNDS) entries are returned. */
+/* > */
+/* >     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
+/* >     right-hand side. */
+/* > */
+/* >     The second index in ERR_BNDS_COMP(:,err) contains the following */
+/* >     three fields: */
+/* >     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
+/* >              reciprocal condition number is less than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). */
+/* > */
+/* >     err = 2 "Guaranteed" error bound: The estimated forward error, */
+/* >              almost certainly within a factor of 10 of the true error */
+/* >              so long as the next entry is greater than the threshold */
+/* >              sqrt(n) * dlamch('Epsilon'). This error bound should only */
+/* >              be trusted if the previous boolean is true. */
+/* > */
+/* >     err = 3  Reciprocal condition number: Estimated componentwise */
+/* >              reciprocal condition number.  Compared with the threshold */
+/* >              sqrt(n) * dlamch('Epsilon') to determine if the error */
+/* >              estimate is "guaranteed". These reciprocal condition */
+/* >              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
+/* >              appropriately scaled matrix Z. */
+/* >              Let Z = S*(A*diag(x)), where x is the solution for the */
+/* >              current right-hand side and S scales each row of */
+/* >              A*diag(x) by a power of the radix so all absolute row */
+/* >              sums of Z are approximately 1. */
+/* > */
+/* >     See Lapack Working Note 165 for further details and extra */
+/* >     cautions. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NPARAMS */
+/* > \verbatim */
+/* >          NPARAMS is INTEGER */
+/* >     Specifies the number of parameters set in PARAMS.  If <= 0, the */
+/* >     PARAMS array is never referenced and default values are used. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] PARAMS */
+/* > \verbatim */
+/* >          PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) */
+/* >     Specifies algorithm parameters.  If an entry is < 0.0, then */
+/* >     that entry will be filled with default value used for that */
+/* >     parameter.  Only positions up to NPARAMS are accessed; defaults */
+/* >     are used for higher-numbered parameters. */
+/* > */
+/* >       PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */
+/* >            refinement or not. */
+/* >         Default: 1.0D+0 */
+/* >            = 0.0:  No refinement is performed, and no error bounds are */
+/* >                    computed. */
+/* >            = 1.0:  Use the extra-precise refinement algorithm. */
+/* >              (other values are reserved for future use) */
+/* > */
+/* >       PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */
+/* >            computations allowed for refinement. */
+/* >         Default: 10 */
+/* >         Aggressive: Set to 100 to permit convergence using approximate */
+/* >                     factorizations or factorizations other than LU. If */
+/* >                     the factorization uses a technique other than */
+/* >                     Gaussian elimination, the guarantees in */
+/* >                     err_bnds_norm and err_bnds_comp may no longer be */
+/* >                     trustworthy. */
+/* > */
+/* >       PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */
+/* >            will attempt to find a solution with small componentwise */
+/* >            relative error in the double-precision algorithm.  Positive */
+/* >            is true, 0.0 is false. */
+/* >         Default: 1.0 (attempt componentwise convergence) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (4*N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IWORK */
+/* > \verbatim */
+/* >          IWORK is INTEGER array, dimension (N) */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >       = 0:  Successful exit. The solution to every right-hand side is */
+/* >         guaranteed. */
+/* >       < 0:  If INFO = -i, the i-th argument had an illegal value */
+/* >       > 0 and <= N:  U(INFO,INFO) is exactly zero.  The factorization */
+/* >         has been completed, but the factor U is exactly singular, so */
+/* >         the solution and error bounds could not be computed. RCOND = 0 */
+/* >         is returned. */
+/* >       = N+J: The solution corresponding to the Jth right-hand side is */
+/* >         not guaranteed. The solutions corresponding to other right- */
+/* >         hand sides K with K > J may not be guaranteed as well, but */
+/* >         only the first such right-hand side is reported. If a small */
+/* >         componentwise error is not requested (PARAMS(3) = 0.0) then */
+/* >         the Jth right-hand side is the first with a normwise error */
+/* >         bound that is not guaranteed (the smallest J such */
+/* >         that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */
+/* >         the Jth right-hand side is the first with either a normwise or */
+/* >         componentwise error bound that is not guaranteed (the smallest */
+/* >         J such that either ERR_BNDS_NORM(J,1) = 0.0 or */
+/* >         ERR_BNDS_COMP(J,1) = 0.0). See the definition of */
+/* >         ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */
+/* >         about all of the right-hand sides check ERR_BNDS_NORM or */
+/* >         ERR_BNDS_COMP. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date April 2012 */
+
+/* > \ingroup doubleGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgesvxx_(char *fact, char *trans, integer *n, integer *
+	nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, 
+	integer *ipiv, char *equed, doublereal *r__, doublereal *c__, 
+	doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *
+	rcond, doublereal *rpvgrw, doublereal *berr, integer *n_err_bnds__, 
+	doublereal *err_bnds_norm__, doublereal *err_bnds_comp__, integer *
+	nparams, doublereal *params, doublereal *work, integer *iwork, 
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
+	    x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, 
+	    err_bnds_comp_dim1, err_bnds_comp_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    doublereal amax;
+    extern doublereal dla_gerpvgrw__(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *);
+    integer j;
+    extern logical lsame_(char *, char *);
+    doublereal rcmin, rcmax;
+    logical equil;
+    extern doublereal dlamch_(char *);
+    extern /* Subroutine */ int dlaqge_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, char *);
+    doublereal colcnd;
+    logical nofact;
+    extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, 
+	    integer *, integer *, integer *), dlacpy_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *, ftnlen);
+    doublereal bignum;
+    integer infequ;
+    logical colequ;
+    extern /* Subroutine */ int dgetrs_(char *, integer *, integer *, 
+	    doublereal *, integer *, integer *, doublereal *, integer *, 
+	    integer *);
+    doublereal rowcnd;
+    logical notran;
+    doublereal smlnum;
+    logical rowequ;
+    extern /* Subroutine */ int dlascl2_(integer *, integer *, doublereal *, 
+	    doublereal *, integer *), dgeequb_(integer *, integer *, 
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, doublereal *, integer *), dgerfsx_(char *, char *, 
+	    integer *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *, integer *, doublereal *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    doublereal *, integer *, integer *);
+
+
+/*  -- LAPACK driver routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     April 2012 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    err_bnds_comp_dim1 = *nrhs;
+    err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1;
+    err_bnds_comp__ -= err_bnds_comp_offset;
+    err_bnds_norm_dim1 = *nrhs;
+    err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1;
+    err_bnds_norm__ -= err_bnds_norm_offset;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    af_dim1 = *ldaf;
+    af_offset = 1 + af_dim1 * 1;
+    af -= af_offset;
+    --ipiv;
+    --r__;
+    --c__;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1 * 1;
+    x -= x_offset;
+    --berr;
+    --params;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    nofact = lsame_(fact, "N");
+    equil = lsame_(fact, "E");
+    notran = lsame_(trans, "N");
+    smlnum = dlamch_("Safe minimum");
+    bignum = 1. / smlnum;
+    if (nofact || equil) {
+	*(unsigned char *)equed = 'N';
+	rowequ = FALSE_;
+	colequ = FALSE_;
+    } else {
+	rowequ = lsame_(equed, "R") || lsame_(equed, 
+		"B");
+	colequ = lsame_(equed, "C") || lsame_(equed, 
+		"B");
+    }
+
+/*     Default is failure.  If an input parameter is wrong or */
+/*     factorization fails, make everything look horrible.  Only the */
+/*     pivot growth is set here, the rest is initialized in DGERFSX. */
+
+    *rpvgrw = 0.;
+
+/*     Test the input parameters.  PARAMS is not tested until DGERFSX. */
+
+    if (! nofact && ! equil && ! lsame_(fact, "F")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && ! 
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -6;
+    } else if (*ldaf < f2cmax(1,*n)) {
+	*info = -8;
+    } else if (lsame_(fact, "F") && ! (rowequ || colequ 
+	    || lsame_(equed, "N"))) {
+	*info = -10;
+    } else {
+	if (rowequ) {
+	    rcmin = bignum;
+	    rcmax = 0.;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		d__1 = rcmin, d__2 = r__[j];
+		rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+		d__1 = rcmax, d__2 = r__[j];
+		rcmax = f2cmax(d__1,d__2);
+/* L10: */
+	    }
+	    if (rcmin <= 0.) {
+		*info = -11;
+	    } else if (*n > 0) {
+		rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		rowcnd = 1.;
+	    }
+	}
+	if (colequ && *info == 0) {
+	    rcmin = bignum;
+	    rcmax = 0.;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+		d__1 = rcmin, d__2 = c__[j];
+		rcmin = f2cmin(d__1,d__2);
+/* Computing MAX */
+		d__1 = rcmax, d__2 = c__[j];
+		rcmax = f2cmax(d__1,d__2);
+/* L20: */
+	    }
+	    if (rcmin <= 0.) {
+		*info = -12;
+	    } else if (*n > 0) {
+		colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum);
+	    } else {
+		colcnd = 1.;
+	    }
+	}
+	if (*info == 0) {
+	    if (*ldb < f2cmax(1,*n)) {
+		*info = -14;
+	    } else if (*ldx < f2cmax(1,*n)) {
+		*info = -16;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESVXX", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+    if (equil) {
+
+/*     Compute row and column scalings to equilibrate the matrix A. */
+
+	dgeequb_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, 
+		&amax, &infequ);
+	if (infequ == 0) {
+
+/*     Equilibrate the matrix. */
+
+	    dlaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &
+		    colcnd, &amax, equed);
+	    rowequ = lsame_(equed, "R") || lsame_(equed,
+		     "B");
+	    colequ = lsame_(equed, "C") || lsame_(equed,
+		     "B");
+	}
+
+/*     If the scaling factors are not applied, set them to 1.0. */
+
+	if (! rowequ) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		r__[j] = 1.;
+	    }
+	}
+	if (! colequ) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		c__[j] = 1.;
+	    }
+	}
+    }
+
+/*     Scale the right-hand side. */
+
+    if (notran) {
+	if (rowequ) {
+	    dlascl2_(n, nrhs, &r__[1], &b[b_offset], ldb);
+	}
+    } else {
+	if (colequ) {
+	    dlascl2_(n, nrhs, &c__[1], &b[b_offset], ldb);
+	}
+    }
+
+    if (nofact || equil) {
+
+/*        Compute the LU factorization of A. */
+
+	dlacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf);
+	dgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info);
+
+/*        Return if INFO is non-zero. */
+
+	if (*info > 0) {
+
+/*           Pivot in column INFO is exactly 0 */
+/*           Compute the reciprocal pivot growth factor of the */
+/*           leading rank-deficient INFO columns of A. */
+
+	    *rpvgrw = dla_gerpvgrw__(n, info, &a[a_offset], lda, &af[
+		    af_offset], ldaf);
+	    return 0;
+	}
+    }
+
+/*     Compute the reciprocal pivot growth factor RPVGRW. */
+
+    *rpvgrw = dla_gerpvgrw__(n, n, &a[a_offset], lda, &af[af_offset], ldaf);
+
+/*     Compute the solution matrix X. */
+
+    dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
+    dgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx,
+	     info);
+
+/*     Use iterative refinement to improve the computed solution and */
+/*     compute error bounds and backward error estimates for it. */
+
+    dgerfsx_(trans, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &
+	    ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, &x[x_offset], ldx, 
+	    rcond, &berr[1], n_err_bnds__, &err_bnds_norm__[
+	    err_bnds_norm_offset], &err_bnds_comp__[err_bnds_comp_offset], 
+	    nparams, &params[1], &work[1], &iwork[1], info);
+
+/*     Scale solutions. */
+
+    if (colequ && notran) {
+	dlascl2_(n, nrhs, &c__[1], &x[x_offset], ldx);
+    } else if (rowequ && ! notran) {
+	dlascl2_(n, nrhs, &r__[1], &x[x_offset], ldx);
+    }
+
+    return 0;
+
+/*     End of DGESVXX */
+} /* dgesvxx_ */
+
diff --git a/lapack-netlib/SRC/dgetc2.c b/lapack-netlib/SRC/dgetc2.c
new file mode 100644
index 000000000..e6abe6eb8
--- /dev/null
+++ b/lapack-netlib/SRC/dgetc2.c
@@ -0,0 +1,646 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b10 = -1.;
+
+/* > \brief \b DGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGETC2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetc2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetc2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetc2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) */
+
+/*       INTEGER            INFO, LDA, N */
+/*       INTEGER            IPIV( * ), JPIV( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGETC2 computes an LU factorization with complete pivoting of the */
+/* > n-by-n matrix A. The factorization has the form A = P * L * U * Q, */
+/* > where P and Q are permutation matrices, L is lower triangular with */
+/* > unit diagonal elements and U is upper triangular. */
+/* > */
+/* > This is the Level 2 BLAS algorithm. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A. N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA, N) */
+/* >          On entry, the n-by-n matrix A to be factored. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U*Q; the unit diagonal elements of L are not stored. */
+/* >          If U(k, k) appears to be less than SMIN, U(k, k) is given the */
+/* >          value of SMIN, i.e., giving a nonsingular perturbed system. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension(N). */
+/* >          The pivot indices; for 1 <= i <= N, row i of the */
+/* >          matrix has been interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] JPIV */
+/* > \verbatim */
+/* >          JPIV is INTEGER array, dimension(N). */
+/* >          The pivot indices; for 1 <= j <= N, column j of the */
+/* >          matrix has been interchanged with column JPIV(j). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >           = 0: successful exit */
+/* >           > 0: if INFO = k, U(k, k) is likely to produce overflow if */
+/* >                we try to solve for x in Ax = b. So U is perturbed to */
+/* >                avoid the overflow. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup doubleGEauxiliary */
+
+/* > \par Contributors: */
+/*  ================== */
+/* > */
+/* >     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
+/* >     Umea University, S-901 87 Umea, Sweden. */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgetc2_(integer *n, doublereal *a, integer *lda, integer 
+	*ipiv, integer *jpiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    doublereal smin, xmax;
+    integer i__, j;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *), dlabad_(doublereal *, doublereal *);
+    extern doublereal dlamch_(char *);
+    integer ip, jp;
+    doublereal bignum, smlnum, eps;
+    integer ipv, jpv;
+
+
+/*  -- LAPACK auxiliary routine (version 3.8.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+    --jpiv;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Set constants to control overflow */
+
+    eps = dlamch_("P");
+    smlnum = dlamch_("S") / eps;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Handle the case N=1 by itself */
+
+    if (*n == 1) {
+	ipiv[1] = 1;
+	jpiv[1] = 1;
+	if ((d__1 = a[a_dim1 + 1], abs(d__1)) < smlnum) {
+	    *info = 1;
+	    a[a_dim1 + 1] = smlnum;
+	}
+	return 0;
+    }
+
+/*     Factorize A using complete pivoting. */
+/*     Set pivots less than SMIN to SMIN. */
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Find f2cmax element in matrix A */
+
+	xmax = 0.;
+	i__2 = *n;
+	for (ip = i__; ip <= i__2; ++ip) {
+	    i__3 = *n;
+	    for (jp = i__; jp <= i__3; ++jp) {
+		if ((d__1 = a[ip + jp * a_dim1], abs(d__1)) >= xmax) {
+		    xmax = (d__1 = a[ip + jp * a_dim1], abs(d__1));
+		    ipv = ip;
+		    jpv = jp;
+		}
+/* L10: */
+	    }
+/* L20: */
+	}
+	if (i__ == 1) {
+/* Computing MAX */
+	    d__1 = eps * xmax;
+	    smin = f2cmax(d__1,smlnum);
+	}
+
+/*        Swap rows */
+
+	if (ipv != i__) {
+	    dswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda);
+	}
+	ipiv[i__] = ipv;
+
+/*        Swap columns */
+
+	if (jpv != i__) {
+	    dswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], &
+		    c__1);
+	}
+	jpiv[i__] = jpv;
+
+/*        Check for singularity */
+
+	if ((d__1 = a[i__ + i__ * a_dim1], abs(d__1)) < smin) {
+	    *info = i__;
+	    a[i__ + i__ * a_dim1] = smin;
+	}
+	i__2 = *n;
+	for (j = i__ + 1; j <= i__2; ++j) {
+	    a[j + i__ * a_dim1] /= a[i__ + i__ * a_dim1];
+/* L30: */
+	}
+	i__2 = *n - i__;
+	i__3 = *n - i__;
+	dger_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ 
+		+ (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * a_dim1], 
+		lda);
+/* L40: */
+    }
+
+    if ((d__1 = a[*n + *n * a_dim1], abs(d__1)) < smin) {
+	*info = *n;
+	a[*n + *n * a_dim1] = smin;
+    }
+
+/*     Set last pivots to N */
+
+    ipiv[*n] = *n;
+    jpiv[*n] = *n;
+
+    return 0;
+
+/*     End of DGETC2 */
+
+} /* dgetc2_ */
+
diff --git a/lapack-netlib/SRC/dgetf2.c b/lapack-netlib/SRC/dgetf2.c
new file mode 100644
index 000000000..066562932
--- /dev/null
+++ b/lapack-netlib/SRC/dgetf2.c
@@ -0,0 +1,620 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b8 = -1.;
+
+/* > \brief \b DGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row
+ interchanges (unblocked algorithm). */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGETF2 + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGETF2 computes an LU factorization of a general m-by-n matrix A */
+/* > using partial pivoting with row interchanges. */
+/* > */
+/* > The factorization has the form */
+/* >    A = P * L * U */
+/* > where P is a permutation matrix, L is lower triangular with unit */
+/* > diagonal elements (lower trapezoidal if m > n), and U is upper */
+/* > triangular (upper trapezoidal if m < n). */
+/* > */
+/* > This is the right-looking Level 2 BLAS version of the algorithm. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the m by n matrix to be factored. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U; the unit diagonal elements of L are not stored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
+/* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0: successful exit */
+/* >          < 0: if INFO = -k, the k-th argument had an illegal value */
+/* >          > 0: if INFO = k, U(k,k) is exactly zero. The factorization */
+/* >               has been completed, but the factor U is exactly */
+/* >               singular, and division by zero will occur if it is used */
+/* >               to solve a system of equations. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
+	    integer *);
+    integer i__, j;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *);
+    doublereal sfmin;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    extern doublereal dlamch_(char *);
+    integer jp;
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETF2", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Compute machine safe minimum */
+
+    sfmin = dlamch_("S");
+
+    i__1 = f2cmin(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
+
+/*        Find pivot and test for singularity. */
+
+	i__2 = *m - j + 1;
+	jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	ipiv[j] = jp;
+	if (a[jp + j * a_dim1] != 0.) {
+
+/*           Apply the interchange to columns 1:N. */
+
+	    if (jp != j) {
+		dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	    }
+
+/*           Compute elements J+1:M of J-th column. */
+
+	    if (j < *m) {
+		if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) {
+		    i__2 = *m - j;
+		    d__1 = 1. / a[j + j * a_dim1];
+		    dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+		} else {
+		    i__2 = *m - j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
+/* L20: */
+		    }
+		}
+	    }
+
+	} else if (*info == 0) {
+
+	    *info = j;
+	}
+
+	if (j < f2cmin(*m,*n)) {
+
+/*           Update trailing submatrix. */
+
+	    i__2 = *m - j;
+	    i__3 = *n - j;
+	    dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + (
+		    j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda);
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGETF2 */
+
+} /* dgetf2_ */
+
diff --git a/lapack-netlib/SRC/dgetrf.c b/lapack-netlib/SRC/dgetrf.c
new file mode 100644
index 000000000..230573617
--- /dev/null
+++ b/lapack-netlib/SRC/dgetrf.c
@@ -0,0 +1,645 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static doublereal c_b16 = 1.;
+static doublereal c_b19 = -1.;
+
+/* > \brief \b DGETRF */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGETRF + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrf.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrf.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrf.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGETRF computes an LU factorization of a general M-by-N matrix A */
+/* > using partial pivoting with row interchanges. */
+/* > */
+/* > The factorization has the form */
+/* >    A = P * L * U */
+/* > where P is a permutation matrix, L is lower triangular with unit */
+/* > diagonal elements (lower trapezoidal if m > n), and U is upper */
+/* > triangular (upper trapezoidal if m < n). */
+/* > */
+/* > This is the right-looking Level 3 BLAS version of the algorithm. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix to be factored. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U; the unit diagonal elements of L are not stored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
+/* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
+/* >                has been completed, but the factor U is exactly */
+/* >                singular, and division by zero will occur if it is used */
+/* >                to solve a system of equations. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    integer i__, j;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *);
+    integer iinfo;
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer jb, nb;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, 
+	    integer *, integer *, integer *, integer *), dgetrf2_(integer *, 
+	    integer *, doublereal *, integer *, integer *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETRF", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    if (nb <= 1 || nb >= f2cmin(*m,*n)) {
+
+/*        Use unblocked code. */
+
+	dgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code. */
+
+	i__1 = f2cmin(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = f2cmin(*m,*n) - j + 1;
+	    jb = f2cmin(i__3,nb);
+
+/*           Factor diagonal and subdiagonal blocks and test for exact */
+/*           singularity. */
+
+	    i__3 = *m - j + 1;
+	    dgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+
+/*           Adjust INFO and the pivot indices. */
+
+	    if (*info == 0 && iinfo > 0) {
+		*info = iinfo + j - 1;
+	    }
+/* Computing MIN */
+	    i__4 = *m, i__5 = j + jb - 1;
+	    i__3 = f2cmin(i__4,i__5);
+	    for (i__ = j; i__ <= i__3; ++i__) {
+		ipiv[i__] = j - 1 + ipiv[i__];
+/* L10: */
+	    }
+
+/*           Apply interchanges to columns 1:J-1. */
+
+	    i__3 = j - 1;
+	    i__4 = j + jb - 1;
+	    dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+
+	    if (j + jb <= *n) {
+
+/*              Apply interchanges to columns J+JB:N. */
+
+		i__3 = *n - j - jb + 1;
+		i__4 = j + jb - 1;
+		dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+			ipiv[1], &c__1);
+
+/*              Compute block row of U. */
+
+		i__3 = *n - j - jb + 1;
+		dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * 
+			a_dim1], lda);
+		if (j + jb <= *m) {
+
+/*                 Update trailing submatrix. */
+
+		    i__3 = *m - j - jb + 1;
+		    i__4 = *n - j - jb + 1;
+		    dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, 
+			    &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + 
+			    jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) *
+			     a_dim1], lda);
+		}
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of DGETRF */
+
+} /* dgetrf_ */
+
diff --git a/lapack-netlib/SRC/dgetrf2.c b/lapack-netlib/SRC/dgetrf2.c
new file mode 100644
index 000000000..e991d5d7f
--- /dev/null
+++ b/lapack-netlib/SRC/dgetrf2.c
@@ -0,0 +1,683 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b13 = 1.;
+static doublereal c_b16 = -1.;
+
+/* > \brief \b DGETRF2 */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/*  Definition: */
+/*  =========== */
+
+/*        SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO ) */
+
+/*       INTEGER            INFO, LDA, M, N */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGETRF2 computes an LU factorization of a general M-by-N matrix A */
+/* > using partial pivoting with row interchanges. */
+/* > */
+/* > The factorization has the form */
+/* >    A = P * L * U */
+/* > where P is a permutation matrix, L is lower triangular with unit */
+/* > diagonal elements (lower trapezoidal if m > n), and U is upper */
+/* > triangular (upper trapezoidal if m < n). */
+/* > */
+/* > This is the recursive version of the algorithm. It divides */
+/* > the matrix into four submatrices: */
+/* > */
+/* >        [  A11 | A12  ]  where A11 is n1 by n1 and A22 is n2 by n2 */
+/* >    A = [ -----|----- ]  with n1 = f2cmin(m,n)/2 */
+/* >        [  A21 | A22  ]       n2 = n-n1 */
+/* > */
+/* >                                       [ A11 ] */
+/* > The subroutine calls itself to factor [ --- ], */
+/* >                                       [ A12 ] */
+/* >                 [ A12 ] */
+/* > do the swaps on [ --- ], solve A12, update A22, */
+/* >                 [ A22 ] */
+/* > */
+/* > then calls itself to factor A22 and do the swaps on A21. */
+/* > */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix to be factored. */
+/* >          On exit, the factors L and U from the factorization */
+/* >          A = P*L*U; the unit diagonal elements of L are not stored. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (f2cmin(M,N)) */
+/* >          The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
+/* >                has been completed, but the factor U is exactly */
+/* >                singular, and division by zero will occur if it is used */
+/* >                to solve a system of equations. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgetrf2_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    doublereal temp;
+    integer i__;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
+	    integer *), dgemm_(char *, char *, integer *, integer *, integer *
+	    , doublereal *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, doublereal *, integer *);
+    integer iinfo;
+    doublereal sfmin;
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer n1, n2;
+    extern doublereal dlamch_(char *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dlaswp_(
+	    integer *, doublereal *, integer *, integer *, integer *, integer 
+	    *, integer *);
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETRF2", &i__1, (ftnlen)7);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+    if (*m == 1) {
+
+/*        Use unblocked code for one row case */
+/*        Just need to handle IPIV and INFO */
+
+	ipiv[1] = 1;
+	if (a[a_dim1 + 1] == 0.) {
+	    *info = 1;
+	}
+
+    } else if (*n == 1) {
+
+/*        Use unblocked code for one column case */
+
+
+/*        Compute machine safe minimum */
+
+	sfmin = dlamch_("S");
+
+/*        Find pivot and test for singularity */
+
+	i__ = idamax_(m, &a[a_dim1 + 1], &c__1);
+	ipiv[1] = i__;
+	if (a[i__ + a_dim1] != 0.) {
+
+/*           Apply the interchange */
+
+	    if (i__ != 1) {
+		temp = a[a_dim1 + 1];
+		a[a_dim1 + 1] = a[i__ + a_dim1];
+		a[i__ + a_dim1] = temp;
+	    }
+
+/*           Compute elements 2:M of the column */
+
+	    if ((d__1 = a[a_dim1 + 1], abs(d__1)) >= sfmin) {
+		i__1 = *m - 1;
+		d__1 = 1. / a[a_dim1 + 1];
+		dscal_(&i__1, &d__1, &a[a_dim1 + 2], &c__1);
+	    } else {
+		i__1 = *m - 1;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    a[i__ + 1 + a_dim1] /= a[a_dim1 + 1];
+/* L10: */
+		}
+	    }
+
+	} else {
+	    *info = 1;
+	}
+
+    } else {
+
+/*        Use recursive code */
+
+	n1 = f2cmin(*m,*n) / 2;
+	n2 = *n - n1;
+
+/*               [ A11 ] */
+/*        Factor [ --- ] */
+/*               [ A21 ] */
+
+	dgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo);
+	if (*info == 0 && iinfo > 0) {
+	    *info = iinfo;
+	}
+
+/*                              [ A12 ] */
+/*        Apply interchanges to [ --- ] */
+/*                              [ A22 ] */
+
+	dlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &
+		c__1);
+
+/*        Solve A12 */
+
+	dtrsm_("L", "L", "N", "U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[(
+		n1 + 1) * a_dim1 + 1], lda);
+
+/*        Update A22 */
+
+	i__1 = *m - n1;
+	dgemm_("N", "N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda, &
+		a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * 
+		a_dim1], lda);
+
+/*        Factor A22 */
+
+	i__1 = *m - n1;
+	dgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 
+		1], &iinfo);
+
+/*        Adjust INFO and the pivot indices */
+
+	if (*info == 0 && iinfo > 0) {
+	    *info = iinfo + n1;
+	}
+	i__1 = f2cmin(*m,*n);
+	for (i__ = n1 + 1; i__ <= i__1; ++i__) {
+	    ipiv[i__] += n1;
+/* L20: */
+	}
+
+/*        Apply interchanges to A21 */
+
+	i__1 = n1 + 1;
+	i__2 = f2cmin(*m,*n);
+	dlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1);
+
+    }
+    return 0;
+
+/*     End of DGETRF2 */
+
+} /* dgetrf2_ */
+
diff --git a/lapack-netlib/SRC/dgetri.c b/lapack-netlib/SRC/dgetri.c
new file mode 100644
index 000000000..a8d223285
--- /dev/null
+++ b/lapack-netlib/SRC/dgetri.c
@@ -0,0 +1,694 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static integer c_n1 = -1;
+static integer c__2 = 2;
+static doublereal c_b20 = -1.;
+static doublereal c_b22 = 1.;
+
+/* > \brief \b DGETRI */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGETRI + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetri.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetri.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetri.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) */
+
+/*       INTEGER            INFO, LDA, LWORK, N */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGETRI computes the inverse of a matrix using the LU factorization */
+/* > computed by DGETRF. */
+/* > */
+/* > This method inverts U and then computes inv(A) by solving the system */
+/* > inv(A)*L = inv(U) for inv(A). */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the factors L and U from the factorization */
+/* >          A = P*L*U as computed by DGETRF. */
+/* >          On exit, if INFO = 0, the inverse of the original matrix A. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices from DGETRF; for 1<=i<=N, row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK.  LWORK >= f2cmax(1,N). */
+/* >          For optimal performance LWORK >= N*NB, where NB is */
+/* >          the optimal blocksize returned by ILAENV. */
+/* > */
+/* >          If LWORK = -1, then a workspace query is assumed; the routine */
+/* >          only calculates the optimal size of the WORK array, returns */
+/* >          this value as the first entry of the WORK array, and no error */
+/* >          message related to LWORK is issued by XERBLA. */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is */
+/* >                singular and its inverse could not be computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgetri_(integer *n, doublereal *a, integer *lda, integer 
+	*ipiv, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    integer i__, j;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
+	    integer *, doublereal *, doublereal *, integer *),
+	     dgemv_(char *, integer *, integer *, doublereal *, doublereal *, 
+	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
+	    integer *);
+    integer nbmin;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, 
+	    doublereal *, integer *), dtrsm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *);
+    integer jb, nb, jj, jp, nn;
+    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
+	    integer *, integer *, ftnlen, ftnlen);
+    integer ldwork;
+    extern /* Subroutine */ int dtrtri_(char *, char *, integer *, doublereal 
+	    *, integer *, integer *);
+    integer lwkopt;
+    logical lquery;
+    integer iws;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DGETRI", " ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    lwkopt = *n * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -3;
+    } else if (*lwork < f2cmax(1,*n) && ! lquery) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETRI", &i__1, (ftnlen)6);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Form inv(U).  If INFO > 0 from DTRTRI, then U is singular, */
+/*     and the inverse is not computed. */
+
+    dtrtri_("Upper", "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = *n;
+    if (nb > 1 && nb < *n) {
+/* Computing MAX */
+	i__1 = ldwork * nb;
+	iws = f2cmax(i__1,1);
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/* Computing MAX */
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DGETRI", " ", n, &c_n1, &c_n1, &
+		    c_n1, (ftnlen)6, (ftnlen)1);
+	    nbmin = f2cmax(i__1,i__2);
+	}
+    } else {
+	iws = *n;
+    }
+
+/*     Solve the equation inv(A)*L = inv(U) for inv(A). */
+
+    if (nb < nbmin || nb >= *n) {
+
+/*        Use unblocked code. */
+
+	for (j = *n; j >= 1; --j) {
+
+/*           Copy current column of L to WORK and replace with zeros. */
+
+	    i__1 = *n;
+	    for (i__ = j + 1; i__ <= i__1; ++i__) {
+		work[i__] = a[i__ + j * a_dim1];
+		a[i__ + j * a_dim1] = 0.;
+/* L10: */
+	    }
+
+/*           Compute current column of inv(A). */
+
+	    if (j < *n) {
+		i__1 = *n - j;
+		dgemv_("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 
+			+ 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 
+			+ 1], &c__1);
+	    }
+/* L20: */
+	}
+    } else {
+
+/*        Use blocked code. */
+
+	nn = (*n - 1) / nb * nb + 1;
+	i__1 = -nb;
+	for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *n - j + 1;
+	    jb = f2cmin(i__2,i__3);
+
+/*           Copy current block column of L to WORK and replace with */
+/*           zeros. */
+
+	    i__2 = j + jb - 1;
+	    for (jj = j; jj <= i__2; ++jj) {
+		i__3 = *n;
+		for (i__ = jj + 1; i__ <= i__3; ++i__) {
+		    work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1];
+		    a[i__ + jj * a_dim1] = 0.;
+/* L30: */
+		}
+/* L40: */
+	    }
+
+/*           Compute current block column of inv(A). */
+
+	    if (j + jb <= *n) {
+		i__2 = *n - j - jb + 1;
+		dgemm_("No transpose", "No transpose", n, &jb, &i__2, &c_b20, 
+			&a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &
+			ldwork, &c_b22, &a[j * a_dim1 + 1], lda);
+	    }
+	    dtrsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, &
+		    work[j], &ldwork, &a[j * a_dim1 + 1], lda);
+/* L50: */
+	}
+    }
+
+/*     Apply column interchanges. */
+
+    for (j = *n - 1; j >= 1; --j) {
+	jp = ipiv[j];
+	if (jp != j) {
+	    dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
+	}
+/* L60: */
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGETRI */
+
+} /* dgetri_ */
+
diff --git a/lapack-netlib/SRC/dgetrs.c b/lapack-netlib/SRC/dgetrs.c
new file mode 100644
index 000000000..c58328d29
--- /dev/null
+++ b/lapack-netlib/SRC/dgetrs.c
@@ -0,0 +1,620 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublereal c_b12 = 1.;
+static integer c_n1 = -1;
+
+/* > \brief \b DGETRS */
+
+/*  =========== DOCUMENTATION =========== */
+
+/* Online html documentation available at */
+/*            http://www.netlib.org/lapack/explore-html/ */
+
+/* > \htmlonly */
+/* > Download DGETRS + dependencies */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrs.
+f"> */
+/* > [TGZ]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrs.
+f"> */
+/* > [ZIP]</a> */
+/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrs.
+f"> */
+/* > [TXT]</a> */
+/* > \endhtmlonly */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDA, LDB, N, NRHS */
+/*       INTEGER            IPIV( * ) */
+/*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGETRS solves a system of linear equations */
+/* >    A * X = B  or  A**T * X = B */
+/* > with a general N-by-N matrix A using the LU factorization computed */
+/* > by DGETRF. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          Specifies the form of the system of equations: */
+/* >          = 'N':  A * X = B  (No transpose) */
+/* >          = 'T':  A**T* X = B  (Transpose) */
+/* >          = 'C':  A**T* X = B  (Conjugate transpose = Transpose) */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The order of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of columns */
+/* >          of the matrix B.  NRHS >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          The factors L and U from the factorization A = P*L*U */
+/* >          as computed by DGETRF. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[in] IPIV */
+/* > \verbatim */
+/* >          IPIV is INTEGER array, dimension (N) */
+/* >          The pivot indices from DGETRF; for 1<=i<=N, row i of the */
+/* >          matrix was interchanged with row IPIV(i). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the right hand side matrix B. */
+/* >          On exit, the solution matrix X. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B.  LDB >= f2cmax(1,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date December 2016 */
+
+/* > \ingroup doubleGEcomputational */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs, 
+	doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
+	ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
+	    integer *, integer *, doublereal *, doublereal *, integer *, 
+	    doublereal *, integer *), xerbla_(
+	    char *, integer *, ftnlen), dlaswp_(integer *, doublereal *, 
+	    integer *, integer *, integer *, integer *, integer *);
+    logical notran;
+
+
+/*  -- LAPACK computational routine (version 3.7.0) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     December 2016 */
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input parameters. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < f2cmax(1,*n)) {
+	*info = -5;
+    } else if (*ldb < f2cmax(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETRS", &i__1, (ftnlen)6);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    if (notran) {
+
+/*        Solve A * X = B. */
+
+/*        Apply row interchanges to the right hand sides. */
+
+	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+
+/*        Solve L*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*        Solve A**T * X = B. */
+
+/*        Solve U**T *X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L**T *X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Apply row interchanges to the solution vectors. */
+
+	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
+
+    return 0;
+
+/*     End of DGETRS */
+
+} /* dgetrs_ */
+
diff --git a/lapack-netlib/SRC/dgetsls.c b/lapack-netlib/SRC/dgetsls.c
new file mode 100644
index 000000000..db3c76926
--- /dev/null
+++ b/lapack-netlib/SRC/dgetsls.c
@@ -0,0 +1,931 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <complex.h>
+#ifdef complex
+#undef complex
+#endif
+#ifdef I
+#undef I
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
+static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
+static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
+#define pCf(z) (*_pCf(z))
+#define pCd(z) (*_pCd(z))
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#define dabs(x) (fabs(x))
+#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
+#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
+#define dmin(a,b) (f2cmin(a,b))
+#define dmax(a,b) (f2cmax(a,b))
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+#define abort_() { sig_die("Fortran abort routine called", 1); }
+#define c_abs(z) (cabsf(Cf(z)))
+#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
+#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
+#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
+#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
+#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
+#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
+//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
+#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
+#define d_abs(x) (fabs(*(x)))
+#define d_acos(x) (acos(*(x)))
+#define d_asin(x) (asin(*(x)))
+#define d_atan(x) (atan(*(x)))
+#define d_atn2(x, y) (atan2(*(x),*(y)))
+#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
+#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); }
+#define d_cos(x) (cos(*(x)))
+#define d_cosh(x) (cosh(*(x)))
+#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
+#define d_exp(x) (exp(*(x)))
+#define d_imag(z) (cimag(Cd(z)))
+#define r_imag(z) (cimag(Cf(z)))
+#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
+#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
+#define d_log(x) (log(*(x)))
+#define d_mod(x, y) (fmod(*(x), *(y)))
+#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
+#define d_nint(x) u_nint(*(x))
+#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
+#define d_sign(a,b) u_sign(*(a),*(b))
+#define r_sign(a,b) u_sign(*(a),*(b))
+#define d_sin(x) (sin(*(x)))
+#define d_sinh(x) (sinh(*(x)))
+#define d_sqrt(x) (sqrt(*(x)))
+#define d_tan(x) (tan(*(x)))
+#define d_tanh(x) (tanh(*(x)))
+#define i_abs(x) abs(*(x))
+#define i_dnnt(x) ((integer)u_nint(*(x)))
+#define i_len(s, n) (n)
+#define i_nint(x) ((integer)u_nint(*(x)))
+#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
+#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
+#define pow_si(B,E) spow_ui(*(B),*(E))
+#define pow_ri(B,E) spow_ui(*(B),*(E))
+#define pow_di(B,E) dpow_ui(*(B),*(E))
+#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
+#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
+#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
+#define s_cat(lpp, rpp, rnp, np, llp) { 	ftnlen i, nc, ll; char *f__rp, *lp; 	ll = (llp); lp = (lpp); 	for(i=0; i < (int)*(np); ++i) {         	nc = ll; 	        if((rnp)[i] < nc) nc = (rnp)[i]; 	        ll -= nc;         	f__rp = (rpp)[i]; 	        while(--nc >= 0) *lp++ = *(f__rp)++;         } 	while(--ll >= 0) *lp++ = ' '; }
+#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
+#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
+#define sig_die(s, kill) { exit(1); }
+#define s_stop(s, n) {exit(0);}
+static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
+#define z_abs(z) (cabs(Cd(z)))
+#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
+#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
+#define myexit_() break;
+#define mycycle() continue;
+#define myceiling(w) {ceil(w)}
+#define myhuge(w) {HUGE_VAL}
+//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
+#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef logical (*L_fp)(...);
+#else
+typedef logical (*L_fp)();
+#endif
+
+static float spow_ui(float x, integer n) {
+	float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static double dpow_ui(double x, integer n) {
+	double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex float cpow_ui(_Complex float x, integer n) {
+	_Complex float pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static _Complex double zpow_ui(_Complex double x, integer n) {
+	_Complex double pow=1.0; unsigned long int u;
+	if(n != 0) {
+		if(n < 0) n = -n, x = 1/x;
+		for(u = n; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer pow_ii(integer x, integer n) {
+	integer pow; unsigned long int u;
+	if (n <= 0) {
+		if (n == 0 || x == 1) pow = 1;
+		else if (x != -1) pow = x == 0 ? 1/x : 0;
+		else n = -n;
+	}
+	if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
+		u = n;
+		for(pow = 1; ; ) {
+			if(u & 01) pow *= x;
+			if(u >>= 1) x *= x;
+			else break;
+		}
+	}
+	return pow;
+}
+static integer dmaxloc_(double *w, integer s, integer e, integer *n)
+{
+	double m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static integer smaxloc_(float *w, integer s, integer e, integer *n)
+{
+	float m; integer i, mi;
+	for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
+		if (w[i-1]>m) mi=i ,m=w[i-1];
+	return mi-s+1;
+}
+static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}	
+static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex float zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i]) * Cf(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
+		}
+	}
+	pCf(z) = zdotc;
+}
+static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
+	integer n = *n_, incx = *incx_, incy = *incy_, i;
+	_Complex double zdotc = 0.0;
+	if (incx == 1 && incy == 1) {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i]) * Cd(&y[i]);
+		}
+	} else {
+		for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
+			zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
+		}
+	}
+	pCd(z) = zdotc;
+}
+#endif
+/*  -- translated by f2c (version 20000121).
+   You must link the resulting object file with the libraries:
+	-lf2c -lm   (in that order)
+*/
+
+
+
+/* Table of constant values */
+
+static integer c_n1 = -1;
+static integer c_n2 = -2;
+static doublereal c_b23 = 0.;
+static integer c__0 = 0;
+
+/* > \brief \b DGETSLS */
+
+/*  Definition: */
+/*  =========== */
+
+/*       SUBROUTINE DGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, */
+/*     $                     WORK, LWORK, INFO ) */
+
+/*       CHARACTER          TRANS */
+/*       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS */
+/*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * ) */
+
+
+/* > \par Purpose: */
+/*  ============= */
+/* > */
+/* > \verbatim */
+/* > */
+/* > DGETSLS solves overdetermined or underdetermined real linear systems */
+/* > involving an M-by-N matrix A, using a tall skinny QR or short wide LQ */
+/* > factorization of A.  It is assumed that A has full rank. */
+/* > */
+/* > */
+/* > */
+/* > The following options are provided: */
+/* > */
+/* > 1. If TRANS = 'N' and m >= n:  find the least squares solution of */
+/* >    an overdetermined system, i.e., solve the least squares problem */
+/* >                 minimize || B - A*X ||. */
+/* > */
+/* > 2. If TRANS = 'N' and m < n:  find the minimum norm solution of */
+/* >    an underdetermined system A * X = B. */
+/* > */
+/* > 3. If TRANS = 'T' and m >= n:  find the minimum norm solution of */
+/* >    an undetermined system A**T * X = B. */
+/* > */
+/* > 4. If TRANS = 'T' and m < n:  find the least squares solution of */
+/* >    an overdetermined system, i.e., solve the least squares problem */
+/* >                 minimize || B - A**T * X ||. */
+/* > */
+/* > Several right hand side vectors b and solution vectors x can be */
+/* > handled in a single call; they are stored as the columns of the */
+/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
+/* > matrix X. */
+/* > \endverbatim */
+
+/*  Arguments: */
+/*  ========== */
+
+/* > \param[in] TRANS */
+/* > \verbatim */
+/* >          TRANS is CHARACTER*1 */
+/* >          = 'N': the linear system involves A; */
+/* >          = 'T': the linear system involves A**T. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] M */
+/* > \verbatim */
+/* >          M is INTEGER */
+/* >          The number of rows of the matrix A.  M >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] N */
+/* > \verbatim */
+/* >          N is INTEGER */
+/* >          The number of columns of the matrix A.  N >= 0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] NRHS */
+/* > \verbatim */
+/* >          NRHS is INTEGER */
+/* >          The number of right hand sides, i.e., the number of */
+/* >          columns of the matrices B and X. NRHS >=0. */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] A */
+/* > \verbatim */
+/* >          A is DOUBLE PRECISION array, dimension (LDA,N) */
+/* >          On entry, the M-by-N matrix A. */
+/* >          On exit, */
+/* >          A is overwritten by details of its QR or LQ */
+/* >          factorization as returned by DGEQR or DGELQ. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDA */
+/* > \verbatim */
+/* >          LDA is INTEGER */
+/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
+/* > \endverbatim */
+/* > */
+/* > \param[in,out] B */
+/* > \verbatim */
+/* >          B is DOUBLE PRECISION array, dimension (LDB,NRHS) */
+/* >          On entry, the matrix B of right hand side vectors, stored */
+/* >          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */
+/* >          if TRANS = 'T'. */
+/* >          On exit, if INFO = 0, B is overwritten by the solution */
+/* >          vectors, stored columnwise: */
+/* >          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */
+/* >          squares solution vectors. */
+/* >          if TRANS = 'N' and m < n, rows 1 to N of B contain the */
+/* >          minimum norm solution vectors; */
+/* >          if TRANS = 'T' and m >= n, rows 1 to M of B contain the */
+/* >          minimum norm solution vectors; */
+/* >          if TRANS = 'T' and m < n, rows 1 to M of B contain the */
+/* >          least squares solution vectors. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LDB */
+/* > \verbatim */
+/* >          LDB is INTEGER */
+/* >          The leading dimension of the array B. LDB >= MAX(1,M,N). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] WORK */
+/* > \verbatim */
+/* >          (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
+/* >          On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */
+/* >          or optimal, if query was assumed) LWORK. */
+/* >          See LWORK for details. */
+/* > \endverbatim */
+/* > */
+/* > \param[in] LWORK */
+/* > \verbatim */
+/* >          LWORK is INTEGER */
+/* >          The dimension of the array WORK. */
+/* >          If LWORK = -1 or -2, then a workspace query is assumed. */
+/* >          If LWORK = -1, the routine calculates optimal size of WORK for the */
+/* >          optimal performance and returns this value in WORK(1). */
+/* >          If LWORK = -2, the routine calculates minimal size of WORK and */
+/* >          returns this value in WORK(1). */
+/* > \endverbatim */
+/* > */
+/* > \param[out] INFO */
+/* > \verbatim */
+/* >          INFO is INTEGER */
+/* >          = 0:  successful exit */
+/* >          < 0:  if INFO = -i, the i-th argument had an illegal value */
+/* >          > 0:  if INFO =  i, the i-th diagonal element of the */
+/* >                triangular factor of A is zero, so that A does not have */
+/* >                full rank; the least squares solution could not be */
+/* >                computed. */
+/* > \endverbatim */
+
+/*  Authors: */
+/*  ======== */
+
+/* > \author Univ. of Tennessee */
+/* > \author Univ. of California Berkeley */
+/* > \author Univ. of Colorado Denver */
+/* > \author NAG Ltd. */
+
+/* > \date June 2017 */
+
+/* > \ingroup doubleGEsolve */
+
+/*  ===================================================================== */
+/* Subroutine */ int dgetsls_(char *trans, integer *m, integer *n, integer *
+	nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, 
+	doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    doublereal anrm, bnrm;
+    logical tran;
+    integer brow, tszm, tszo, info2, i__, j, iascl, ibscl;
+    extern /* Subroutine */ int dgelq_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgeqr_(integer *, integer *, doublereal *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    integer *);
+    integer minmn, maxmn;
+    doublereal workq[1];
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
+	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
+	    integer *, integer *);
+    doublereal tq[5];
+    extern /* Subroutine */ int dgemlq_(char *, char *, integer *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *), dlaset_(char *, integer *, integer *, doublereal 
+	    *, doublereal *, doublereal *, integer *), xerbla_(char *,
+	     integer *, ftnlen), dgemqr_(char *, char *, integer *, integer *,
+	     integer *, doublereal *, integer *, doublereal *, integer *, 
+	    doublereal *, integer *, doublereal *, integer *, integer *);
+    integer scllen;
+    doublereal bignum, smlnum;
+    integer wsizem, wsizeo;
+    logical lquery;
+    extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, 
+	    integer *, doublereal *, integer *, doublereal *, integer *, 
+	    integer *);
+    integer lw1, lw2, mnk, lwm, lwo;
+
+
+/*  -- LAPACK driver routine (version 3.7.1) -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
+/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
+/*     June 2017 */
+
+
+
+/*  ===================================================================== */
+
+
+/*     Test the input arguments. */
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1 * 1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1 * 1;
+    b -= b_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    minmn = f2cmin(*m,*n);
+    maxmn = f2cmax(*m,*n);
+    mnk = f2cmax(minmn,*nrhs);
+    tran = lsame_(trans, "T");
+
+    lquery = *lwork == -1 || *lwork == -2;
+    if (! (lsame_(trans, "N") || lsame_(trans, "T"))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 0) {
+	*info = -4;
+    } else if (*lda < f2cmax(1,*m)) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = f2cmax(1,*m);
+	if (*ldb < f2cmax(i__1,*n)) {
+	    *info = -8;
+	}
+    }
+
+    if (*info == 0) {
+
+/*     Determine the block size and minimum LWORK */
+
+	if (*m >= *n) {
+	    dgeqr_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
+	    tszo = (integer) tq[0];
+	    lwo = (integer) workq[0];
+	    dgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszo, &b[
+		    b_offset], ldb, workq, &c_n1, &info2);
+/* Computing MAX */
+	    i__1 = lwo, i__2 = (integer) workq[0];
+	    lwo = f2cmax(i__1,i__2);
+	    dgeqr_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
+	    tszm = (integer) tq[0];
+	    lwm = (integer) workq[0];
+	    dgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszm, &b[
+		    b_offset], ldb, workq, &c_n1, &info2);
+/* Computing MAX */
+	    i__1 = lwm, i__2 = (integer) workq[0];
+	    lwm = f2cmax(i__1,i__2);
+	    wsizeo = tszo + lwo;
+	    wsizem = tszm + lwm;
+	} else {
+	    dgelq_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2);
+	    tszo = (integer) tq[0];
+	    lwo = (integer) workq[0];
+	    dgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszo, &b[
+		    b_offset], ldb, workq, &c_n1, &info2);
+/* Computing MAX */
+	    i__1 = lwo, i__2 = (integer) workq[0];
+	    lwo = f2cmax(i__1,i__2);
+	    dgelq_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2);
+	    tszm = (integer) tq[0];
+	    lwm = (integer) workq[0];
+	    dgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszm, &b[
+		    b_offset], ldb, workq, &c_n1, &info2);
+/* Computing MAX */
+	    i__1 = lwm, i__2 = (integer) workq[0];
+	    lwm = f2cmax(i__1,i__2);
+	    wsizeo = tszo + lwo;
+	    wsizem = tszm + lwm;
+	}
+
+	if (*lwork < wsizem && ! lquery) {
+	    *info = -10;
+	}
+
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETSLS", &i__1, (ftnlen)7);
+	work[1] = (doublereal) wsizeo;
+	return 0;
+    }
+    if (lquery) {
+	if (*lwork == -1) {
+	    work[1] = (real) wsizeo;
+	}
+	if (*lwork == -2) {
+	    work[1] = (real) wsizem;
+	}
+	return 0;
+    }
+    if (*lwork < wsizeo) {
+	lw1 = tszm;
+	lw2 = lwm;
+    } else {
+	lw1 = tszo;
+	lw2 = lwo;
+    }
+
+/*     Quick return if possible */
+
+/* Computing MIN */
+    i__1 = f2cmin(*m,*n);
+    if (f2cmin(i__1,*nrhs) == 0) {
+	i__1 = f2cmax(*m,*n);
+	dlaset_("FULL", &i__1, nrhs, &c_b23, &c_b23, &b[b_offset], ldb);
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = dlamch_("S") / dlamch_("P");
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
+		info);
+	iascl = 2;
+    } else if (anrm == 0.) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	dlaset_("F", &maxmn, nrhs, &c_b23, &c_b23, &b[b_offset], ldb);
+	goto L50;
+    }
+
+    brow = *m;
+    if (tran) {
+	brow = *n;
+    }
+    bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], 
+		ldb, info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], 
+		ldb, info);
+	ibscl = 2;
+    }
+
+    if (*m >= *n) {
+
+/*        compute QR factorization of A */
+
+	dgeqr_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
+		info);
+	if (! tran) {
+
+/*           Least-Squares Problem f2cmin || A * X - B || */
+
+/*           B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */
+
+	    dgemqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
+		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
+
+/*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */
+
+	    dtrtrs_("U", "N", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
+		    ldb, info);
+	    if (*info > 0) {
+		return 0;
+	    }
+	    scllen = *n;
+	} else {
+
+/*           Overdetermined system of equations A**T * X = B */
+
+/*           B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */
+
+	    dtrtrs_("U", "T", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], 
+		    ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+/*           B(N+1:M,1:NRHS) = ZERO */
+
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = *n + 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = 0.;
+/* L10: */
+		}
+/* L20: */
+	    }
+
+/*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */
+
+	    dgemqr_("L", "N", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], &
+		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
+
+	    scllen = *m;
+
+	}
+
+    } else {
+
+/*        Compute LQ factorization of A */
+
+	dgelq_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, 
+		info);
+
+/*        workspace at least M, optimally M*NB. */
+
+	if (! tran) {
+
+/*           underdetermined system of equations A * X = B */
+
+/*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */
+
+	    dtrtrs_("L", "N", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], 
+		    ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+/*           B(M+1:N,1:NRHS) = 0 */
+
+	    i__1 = *nrhs;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = *m + 1; i__ <= i__2; ++i__) {
+		    b[i__ + j * b_dim1] = 0.;
+/* L30: */
+		}
+/* L40: */
+	    }
+
+/*           B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */
+
+	    dgemlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
+		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+	    scllen = *n;
+
+	} else {
+
+/*           overdetermined system f2cmin || A**T * X - B || */
+
+/*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */
+
+	    dgemlq_("L", "N", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], &
+		    lw1, &b[b_offset], ldb, &work[1], &lw2, info);
+
+/*           workspace at least NRHS, optimally NRHS*NB */
+
+/*           B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */
+
+	    dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], 
+		    lda, &b[b_offset], ldb, info);
+
+	    if (*info > 0) {
+		return 0;
+	    }
+
+	    scllen = *m;
+
+	}
+
+    }
+
+/*     Undo scaling */
+
+    if (iascl == 1) {
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    } else if (iascl == 2) {
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    }
+    if (ibscl == 1) {
+	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    } else if (ibscl == 2) {
+	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset]
+		, ldb, info);
+    }
+
+L50:
+    work[1] = (doublereal) (tszo + lwo);
+    return 0;
+
+/*     End of DGETSLS */
+
+} /* dgetsls_ */
+