From 5d23defa913fd80a15bf1ec74cb1deca2b845b5f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 22 Feb 2022 23:37:19 +0100 Subject: [PATCH] Add C versions as fallback --- lapack-netlib/SRC/sgbtrs.c | 684 ++++ lapack-netlib/SRC/sgebak.c | 674 ++++ lapack-netlib/SRC/sgebal.c | 838 +++++ lapack-netlib/SRC/sgebd2.c | 744 ++++ lapack-netlib/SRC/sgebrd.c | 783 ++++ lapack-netlib/SRC/sgecon.c | 656 ++++ lapack-netlib/SRC/sgeequ.c | 733 ++++ lapack-netlib/SRC/sgeequb.c | 753 ++++ lapack-netlib/SRC/sgees.c | 1001 +++++ lapack-netlib/SRC/sgeesx.c | 1120 ++++++ lapack-netlib/SRC/sgeev.c | 1029 ++++++ lapack-netlib/SRC/sgeevx.c | 1197 ++++++ lapack-netlib/SRC/sgehd2.c | 627 ++++ lapack-netlib/SRC/sgehrd.c | 782 ++++ lapack-netlib/SRC/sgejsv.c | 2678 ++++++++++++++ lapack-netlib/SRC/sgelq.c | 742 ++++ lapack-netlib/SRC/sgelq2.c | 597 +++ lapack-netlib/SRC/sgelqf.c | 698 ++++ lapack-netlib/SRC/sgelqt.c | 601 +++ lapack-netlib/SRC/sgelqt3.c | 657 ++++ lapack-netlib/SRC/sgels.c | 954 +++++ lapack-netlib/SRC/sgelss.c | 1308 +++++++ lapack-netlib/SRC/sgelsy.c | 939 +++++ lapack-netlib/SRC/sgemlq.c | 681 ++++ lapack-netlib/SRC/sgemlqt.c | 686 ++++ lapack-netlib/SRC/sgemqr.c | 683 ++++ lapack-netlib/SRC/sgemqrt.c | 706 ++++ lapack-netlib/SRC/sgeql2.c | 591 +++ lapack-netlib/SRC/sgeqlf.c | 709 ++++ lapack-netlib/SRC/sgeqp3.c | 789 ++++ lapack-netlib/SRC/sgeqr.c | 733 ++++ lapack-netlib/SRC/sgeqr2.c | 602 +++ lapack-netlib/SRC/sgeqr2p.c | 607 +++ lapack-netlib/SRC/sgeqrf.c | 700 ++++ lapack-netlib/SRC/sgeqrfp.c | 703 ++++ lapack-netlib/SRC/sgeqrt.c | 628 ++++ lapack-netlib/SRC/sgeqrt2.c | 645 ++++ lapack-netlib/SRC/sgeqrt3.c | 678 ++++ lapack-netlib/SRC/sgerfs.c | 879 +++++ lapack-netlib/SRC/sgerfsx.c | 1146 ++++++ lapack-netlib/SRC/sgerq2.c | 587 +++ lapack-netlib/SRC/sgerqf.c | 711 ++++ lapack-netlib/SRC/sgesc2.c | 604 +++ lapack-netlib/SRC/sgesdd.c | 2165 +++++++++++ lapack-netlib/SRC/sgesv.c | 576 +++ lapack-netlib/SRC/sgesvd.c | 4471 +++++++++++++++++++++++ lapack-netlib/SRC/sgesvdq.c | 2124 +++++++++++ lapack-netlib/SRC/sgesvdx.c | 1339 +++++++ lapack-netlib/SRC/sgesvj.c | 2210 +++++++++++ lapack-netlib/SRC/sgesvx.c | 1063 ++++++ lapack-netlib/SRC/sgesvxx.c | 1210 ++++++ lapack-netlib/SRC/sgetc2.c | 645 ++++ lapack-netlib/SRC/sgetf2.c | 618 ++++ lapack-netlib/SRC/sgetrf.c | 643 ++++ lapack-netlib/SRC/sgetrf2.c | 681 ++++ lapack-netlib/SRC/sgetri.c | 690 ++++ lapack-netlib/SRC/sgetrs.c | 619 ++++ lapack-netlib/SRC/sgetsls.c | 929 +++++ lapack-netlib/SRC/sgetsqrhrt.c | 765 ++++ lapack-netlib/SRC/sggbak.c | 720 ++++ lapack-netlib/SRC/sggbal.c | 1069 ++++++ lapack-netlib/SRC/sgges.c | 1162 ++++++ lapack-netlib/SRC/sgges3.c | 1163 ++++++ lapack-netlib/SRC/sggesx.c | 1308 +++++++ lapack-netlib/SRC/sggev.c | 1099 ++++++ lapack-netlib/SRC/sggev3.c | 1111 ++++++ lapack-netlib/SRC/sggevx.c | 1385 +++++++ lapack-netlib/SRC/sggglm.c | 787 ++++ lapack-netlib/SRC/sgghd3.c | 1452 ++++++++ lapack-netlib/SRC/sgghrd.c | 784 ++++ lapack-netlib/SRC/sgglse.c | 786 ++++ lapack-netlib/SRC/sggqrf.c | 718 ++++ lapack-netlib/SRC/sggrqf.c | 719 ++++ lapack-netlib/SRC/sggsvd3.c | 936 +++++ lapack-netlib/SRC/sggsvp3.c | 1056 ++++++ lapack-netlib/SRC/sgsvj0.c | 1592 ++++++++ lapack-netlib/SRC/sgsvj1.c | 1234 +++++++ lapack-netlib/SRC/sgtcon.c | 649 ++++ lapack-netlib/SRC/sgtrfs.c | 913 +++++ lapack-netlib/SRC/sgtsv.c | 745 ++++ lapack-netlib/SRC/sgtsvx.c | 828 +++++ lapack-netlib/SRC/sgttrf.c | 632 ++++ lapack-netlib/SRC/sgttrs.c | 632 ++++ lapack-netlib/SRC/sgtts2.c | 706 ++++ lapack-netlib/SRC/shgeqz.c | 1976 ++++++++++ lapack-netlib/SRC/shsein.c | 968 +++++ lapack-netlib/SRC/shseqr.c | 941 +++++ lapack-netlib/SRC/sisnan.c | 469 +++ lapack-netlib/SRC/sla_gbamv.c | 815 +++++ lapack-netlib/SRC/sla_gbrcond.c | 791 ++++ lapack-netlib/SRC/sla_gbrfsx_extended.c | 1135 ++++++ lapack-netlib/SRC/sla_gbrpvgrw.c | 569 +++ lapack-netlib/SRC/sla_geamv.c | 779 ++++ lapack-netlib/SRC/sla_gercond.c | 735 ++++ lapack-netlib/SRC/sla_gerfsx_extended.c | 1119 ++++++ lapack-netlib/SRC/sla_gerpvgrw.c | 542 +++ 96 files changed, 90636 insertions(+) create mode 100644 lapack-netlib/SRC/sgbtrs.c create mode 100644 lapack-netlib/SRC/sgebak.c create mode 100644 lapack-netlib/SRC/sgebal.c create mode 100644 lapack-netlib/SRC/sgebd2.c create mode 100644 lapack-netlib/SRC/sgebrd.c create mode 100644 lapack-netlib/SRC/sgecon.c create mode 100644 lapack-netlib/SRC/sgeequ.c create mode 100644 lapack-netlib/SRC/sgeequb.c create mode 100644 lapack-netlib/SRC/sgees.c create mode 100644 lapack-netlib/SRC/sgeesx.c create mode 100644 lapack-netlib/SRC/sgeev.c create mode 100644 lapack-netlib/SRC/sgeevx.c create mode 100644 lapack-netlib/SRC/sgehd2.c create mode 100644 lapack-netlib/SRC/sgehrd.c create mode 100644 lapack-netlib/SRC/sgejsv.c create mode 100644 lapack-netlib/SRC/sgelq.c create mode 100644 lapack-netlib/SRC/sgelq2.c create mode 100644 lapack-netlib/SRC/sgelqf.c create mode 100644 lapack-netlib/SRC/sgelqt.c create mode 100644 lapack-netlib/SRC/sgelqt3.c create mode 100644 lapack-netlib/SRC/sgels.c create mode 100644 lapack-netlib/SRC/sgelss.c create mode 100644 lapack-netlib/SRC/sgelsy.c create mode 100644 lapack-netlib/SRC/sgemlq.c create mode 100644 lapack-netlib/SRC/sgemlqt.c create mode 100644 lapack-netlib/SRC/sgemqr.c create mode 100644 lapack-netlib/SRC/sgemqrt.c create mode 100644 lapack-netlib/SRC/sgeql2.c create mode 100644 lapack-netlib/SRC/sgeqlf.c create mode 100644 lapack-netlib/SRC/sgeqp3.c create mode 100644 lapack-netlib/SRC/sgeqr.c create mode 100644 lapack-netlib/SRC/sgeqr2.c create mode 100644 lapack-netlib/SRC/sgeqr2p.c create mode 100644 lapack-netlib/SRC/sgeqrf.c create mode 100644 lapack-netlib/SRC/sgeqrfp.c create mode 100644 lapack-netlib/SRC/sgeqrt.c create mode 100644 lapack-netlib/SRC/sgeqrt2.c create mode 100644 lapack-netlib/SRC/sgeqrt3.c create mode 100644 lapack-netlib/SRC/sgerfs.c create mode 100644 lapack-netlib/SRC/sgerfsx.c create mode 100644 lapack-netlib/SRC/sgerq2.c create mode 100644 lapack-netlib/SRC/sgerqf.c create mode 100644 lapack-netlib/SRC/sgesc2.c create mode 100644 lapack-netlib/SRC/sgesdd.c create mode 100644 lapack-netlib/SRC/sgesv.c create mode 100644 lapack-netlib/SRC/sgesvd.c create mode 100644 lapack-netlib/SRC/sgesvdq.c create mode 100644 lapack-netlib/SRC/sgesvdx.c create mode 100644 lapack-netlib/SRC/sgesvj.c create mode 100644 lapack-netlib/SRC/sgesvx.c create mode 100644 lapack-netlib/SRC/sgesvxx.c create mode 100644 lapack-netlib/SRC/sgetc2.c create mode 100644 lapack-netlib/SRC/sgetf2.c create mode 100644 lapack-netlib/SRC/sgetrf.c create mode 100644 lapack-netlib/SRC/sgetrf2.c create mode 100644 lapack-netlib/SRC/sgetri.c create mode 100644 lapack-netlib/SRC/sgetrs.c create mode 100644 lapack-netlib/SRC/sgetsls.c create mode 100644 lapack-netlib/SRC/sgetsqrhrt.c create mode 100644 lapack-netlib/SRC/sggbak.c create mode 100644 lapack-netlib/SRC/sggbal.c create mode 100644 lapack-netlib/SRC/sgges.c create mode 100644 lapack-netlib/SRC/sgges3.c create mode 100644 lapack-netlib/SRC/sggesx.c create mode 100644 lapack-netlib/SRC/sggev.c create mode 100644 lapack-netlib/SRC/sggev3.c create mode 100644 lapack-netlib/SRC/sggevx.c create mode 100644 lapack-netlib/SRC/sggglm.c create mode 100644 lapack-netlib/SRC/sgghd3.c create mode 100644 lapack-netlib/SRC/sgghrd.c create mode 100644 lapack-netlib/SRC/sgglse.c create mode 100644 lapack-netlib/SRC/sggqrf.c create mode 100644 lapack-netlib/SRC/sggrqf.c create mode 100644 lapack-netlib/SRC/sggsvd3.c create mode 100644 lapack-netlib/SRC/sggsvp3.c create mode 100644 lapack-netlib/SRC/sgsvj0.c create mode 100644 lapack-netlib/SRC/sgsvj1.c create mode 100644 lapack-netlib/SRC/sgtcon.c create mode 100644 lapack-netlib/SRC/sgtrfs.c create mode 100644 lapack-netlib/SRC/sgtsv.c create mode 100644 lapack-netlib/SRC/sgtsvx.c create mode 100644 lapack-netlib/SRC/sgttrf.c create mode 100644 lapack-netlib/SRC/sgttrs.c create mode 100644 lapack-netlib/SRC/sgtts2.c create mode 100644 lapack-netlib/SRC/shgeqz.c create mode 100644 lapack-netlib/SRC/shsein.c create mode 100644 lapack-netlib/SRC/shseqr.c create mode 100644 lapack-netlib/SRC/sisnan.c create mode 100644 lapack-netlib/SRC/sla_gbamv.c create mode 100644 lapack-netlib/SRC/sla_gbrcond.c create mode 100644 lapack-netlib/SRC/sla_gbrfsx_extended.c create mode 100644 lapack-netlib/SRC/sla_gbrpvgrw.c create mode 100644 lapack-netlib/SRC/sla_geamv.c create mode 100644 lapack-netlib/SRC/sla_gercond.c create mode 100644 lapack-netlib/SRC/sla_gerfsx_extended.c create mode 100644 lapack-netlib/SRC/sla_gerpvgrw.c diff --git a/lapack-netlib/SRC/sgbtrs.c b/lapack-netlib/SRC/sgbtrs.c new file mode 100644 index 000000000..e67ecf8cf --- /dev/null +++ b/lapack-netlib/SRC/sgbtrs.c @@ -0,0 +1,684 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGBTRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGBTRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* REAL AB( LDAB, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGBTRS solves a system of linear equations */ +/* > A * X = B or A**T * X = B */ +/* > with a general band matrix A using the LU factorization computed */ +/* > by SGBTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations. */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T* X = B (Transpose) */ +/* > = 'C': A**T* X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is REAL array, dimension (LDAB,N) */ +/* > Details of the LU factorization of the band matrix A, as */ +/* > computed by SGBTRF. U is stored as an upper triangular band */ +/* > matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */ +/* > the multipliers used during the factorization are stored in */ +/* > rows KL+KU+2 to 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= N, row i of the matrix was */ +/* > interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgbtrs_(char *trans, integer *n, integer *kl, integer * + ku, integer *nrhs, real *ab, integer *ldab, integer *ipiv, real *b, + integer *ldb, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + integer i__, j, l; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + logical lnoti; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), stbsv_(char *, char *, char *, integer *, integer *, + real *, integer *, real *, integer *); + integer kd, lm; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0) { + *info = -3; + } else if (*ku < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*ldab < (*kl << 1) + *ku + 1) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGBTRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + kd = *ku + *kl + 1; + lnoti = *kl > 0; + + if (notran) { + +/* Solve A*X = B. */ + +/* Solve L*X = B, overwriting B with X. */ + +/* L is represented as a product of permutations and unit lower */ +/* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), */ +/* where each transformation L(i) is a rank-one modification of */ +/* the identity matrix. */ + + if (lnoti) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__2 = *kl, i__3 = *n - j; + lm = f2cmin(i__2,i__3); + l = ipiv[j]; + if (l != j) { + sswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb); + } + sger_(&lm, nrhs, &c_b7, &ab[kd + 1 + j * ab_dim1], &c__1, &b[ + j + b_dim1], ldb, &b[j + 1 + b_dim1], ldb); +/* L10: */ + } + } + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Solve U*X = B, overwriting B with X. */ + + i__2 = *kl + *ku; + stbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &ab[ + ab_offset], ldab, &b[i__ * b_dim1 + 1], &c__1); +/* L20: */ + } + + } else { + +/* Solve A**T*X = B. */ + + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Solve U**T*X = B, overwriting B with X. */ + + i__2 = *kl + *ku; + stbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &ab[ab_offset], + ldab, &b[i__ * b_dim1 + 1], &c__1); +/* L30: */ + } + +/* Solve L**T*X = B, overwriting B with X. */ + + if (lnoti) { + for (j = *n - 1; j >= 1; --j) { +/* Computing MIN */ + i__1 = *kl, i__2 = *n - j; + lm = f2cmin(i__1,i__2); + sgemv_("Transpose", &lm, nrhs, &c_b7, &b[j + 1 + b_dim1], ldb, + &ab[kd + 1 + j * ab_dim1], &c__1, &c_b23, &b[j + + b_dim1], ldb); + l = ipiv[j]; + if (l != j) { + sswap_(nrhs, &b[l + b_dim1], ldb, &b[j + b_dim1], ldb); + } +/* L40: */ + } + } + } + return 0; + +/* End of SGBTRS */ + +} /* sgbtrs_ */ + diff --git a/lapack-netlib/SRC/sgebak.c b/lapack-netlib/SRC/sgebak.c new file mode 100644 index 000000000..db6ec051c --- /dev/null +++ b/lapack-netlib/SRC/sgebak.c @@ -0,0 +1,674 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEBAK */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEBAK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, */ +/* INFO ) */ + +/* CHARACTER JOB, SIDE */ +/* INTEGER IHI, ILO, INFO, LDV, M, N */ +/* REAL V( LDV, * ), SCALE( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEBAK forms the right or left eigenvectors of a real general matrix */ +/* > by backward transformation on the computed eigenvectors of the */ +/* > balanced matrix output by SGEBAL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies the type of backward transformation required: */ +/* > = 'N': do nothing, return immediately; */ +/* > = 'P': do backward transformation for permutation only; */ +/* > = 'S': do backward transformation for scaling only; */ +/* > = 'B': do backward transformations for both permutation and */ +/* > scaling. */ +/* > JOB must be the same as the argument JOB supplied to SGEBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'R': V contains right eigenvectors; */ +/* > = 'L': V contains left eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrix V. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > The integers ILO and IHI determined by SGEBAL. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SCALE */ +/* > \verbatim */ +/* > SCALE is REAL array, dimension (N) */ +/* > Details of the permutation and scaling factors, as returned */ +/* > by SGEBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix V. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is REAL array, dimension (LDV,M) */ +/* > On entry, the matrix of right or left eigenvectors to be */ +/* > transformed, as returned by SHSEIN or STREVC. */ +/* > On exit, V is overwritten by the transformed eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer + *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + integer i__, k; + real s; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + logical leftv; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *); + integer ii; + extern /* Subroutine */ int xerbla_(char *, integer *,ftnlen); + logical rightv; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Decode and Test the input parameters */ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) { + *info = -4; + } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < f2cmax(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEBAK", &i__1,(ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + sscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1.f / scale[i__]; + sscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* Backward permutation */ + +/* For I = ILO-1 step -1 until 1, */ +/* IHI+1 step 1 until N do -- */ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L40; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L50; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of SGEBAK */ + +} /* sgebak_ */ + diff --git a/lapack-netlib/SRC/sgebal.c b/lapack-netlib/SRC/sgebal.c new file mode 100644 index 000000000..672a0b985 --- /dev/null +++ b/lapack-netlib/SRC/sgebal.c @@ -0,0 +1,838 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEBAL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEBAL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) */ + +/* CHARACTER JOB */ +/* INTEGER IHI, ILO, INFO, LDA, N */ +/* REAL A( LDA, * ), SCALE( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEBAL balances a general real matrix A. This involves, first, */ +/* > permuting A by a similarity transformation to isolate eigenvalues */ +/* > in the first 1 to ILO-1 and last IHI+1 to N elements on the */ +/* > diagonal; and second, applying a diagonal similarity transformation */ +/* > to rows and columns ILO to IHI to make the rows and columns as */ +/* > close in norm as possible. Both steps are optional. */ +/* > */ +/* > Balancing may reduce the 1-norm of the matrix, and improve the */ +/* > accuracy of the computed eigenvalues and/or eigenvectors. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies the operations to be performed on A: */ +/* > = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 */ +/* > for i = 1,...,N; */ +/* > = 'P': permute only; */ +/* > = 'S': scale only; */ +/* > = 'B': both permute and scale. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the input matrix A. */ +/* > On exit, A is overwritten by the balanced matrix. */ +/* > If JOB = 'N', A is not referenced. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are set to integers such that on exit */ +/* > A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. */ +/* > If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied to */ +/* > A. If P(j) is the index of the row and column interchanged */ +/* > with row and column j and D(j) is the scaling factor */ +/* > applied to row and column j, then */ +/* > SCALE(j) = P(j) for j = 1,...,ILO-1 */ +/* > = D(j) for j = ILO,...,IHI */ +/* > = P(j) for j = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The permutations consist of row and column interchanges which put */ +/* > the matrix in the form */ +/* > */ +/* > ( T1 X Y ) */ +/* > P A P = ( 0 B Z ) */ +/* > ( 0 0 T2 ) */ +/* > */ +/* > where T1 and T2 are upper triangular matrices whose eigenvalues lie */ +/* > along the diagonal. The column indices ILO and IHI mark the starting */ +/* > and ending columns of the submatrix B. Balancing consists of applying */ +/* > a diagonal similarity transformation inv(D) * B * D to make the */ +/* > 1-norms of each row of B and its corresponding column nearly equal. */ +/* > The output matrix is */ +/* > */ +/* > ( T1 X*D Y ) */ +/* > ( 0 inv(D)*B*D inv(D)*Z ). */ +/* > ( 0 0 T2 ) */ +/* > */ +/* > Information about the permutations P and the diagonal matrix D is */ +/* > returned in the vector SCALE. */ +/* > */ +/* > This subroutine is based on the EISPACK routine BALANC. */ +/* > */ +/* > Modified by Tzu-Yi Chen, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgebal_(char *job, integer *n, real *a, integer *lda, + integer *ilo, integer *ihi, real *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + integer iexc; + extern real snrm2_(integer *, real *, integer *); + real c__, f, g; + integer i__, j, k, l, m; + real r__, s; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sswap_(integer *, real *, integer *, real *, integer *); + real sfmin1, sfmin2, sfmax1, sfmax2, ca, ra; + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *,ftnlen); + extern integer isamax_(integer *, real *, integer *); + extern logical sisnan_(real *); + logical noconv; + integer ica, ira; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEBAL", &i__1,(ftnlen)6); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (real) j; + if (j == m) { + goto L30; + } + + sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.f) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.f) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* Balance the submatrix in rows K to L. */ + +/* Iterative loop for norm reduction */ + + sfmin1 = slamch_("S") / slamch_("P"); + sfmax1 = 1.f / sfmin1; + sfmin2 = sfmin1 * 2.f; + sfmax2 = 1.f / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + + i__2 = l - k + 1; + c__ = snrm2_(&i__2, &a[k + i__ * a_dim1], &c__1); + i__2 = l - k + 1; + r__ = snrm2_(&i__2, &a[i__ + k * a_dim1], lda); + ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (r__1 = a[ica + i__ * a_dim1], abs(r__1)); + i__2 = *n - k + 1; + ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], abs(r__1)); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0.f || r__ == 0.f) { + goto L200; + } + g = r__ / 2.f; + f = 1.f; + s = c__ + r__; +L160: +/* Computing MAX */ + r__1 = f2cmax(f,c__); +/* Computing MIN */ + r__2 = f2cmin(r__,g); + if (c__ >= g || f2cmax(r__1,ca) >= sfmax2 || f2cmin(r__2,ra) <= sfmin2) { + goto L170; + } + f *= 2.f; + c__ *= 2.f; + ca *= 2.f; + r__ /= 2.f; + g /= 2.f; + ra /= 2.f; + goto L160; + +L170: + g = c__ / 2.f; +L180: +/* Computing MIN */ + r__1 = f2cmin(f,c__), r__1 = f2cmin(r__1,g); + if (g < r__ || f2cmax(r__,ra) >= sfmax2 || f2cmin(r__1,ca) <= sfmin2) { + goto L190; + } + r__1 = c__ + f + ca + r__ + g + ra; + if (sisnan_(&r__1)) { + +/* Exit if NaN to avoid infinite loop */ + + *info = -3; + i__2 = -(*info); + xerbla_("SGEBAL", &i__2, (ftnlen)6); + return 0; + } + f /= 2.f; + c__ /= 2.f; + g /= 2.f; + ca /= 2.f; + r__ *= 2.f; + ra *= 2.f; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95f) { + goto L200; + } + if (f < 1.f && scale[i__] < 1.f) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1.f && scale[i__] > 1.f) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1.f / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of SGEBAL */ + +} /* sgebal_ */ + diff --git a/lapack-netlib/SRC/sgebd2.c b/lapack-netlib/SRC/sgebd2.c new file mode 100644 index 000000000..0e774257e --- /dev/null +++ b/lapack-netlib/SRC/sgebd2.c @@ -0,0 +1,744 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEBD2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), */ +/* $ TAUQ( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEBD2 reduces a real general m by n matrix A to upper or lower */ +/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */ +/* > */ +/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows in the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns in the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the m by n general matrix to be reduced. */ +/* > On exit, */ +/* > if m >= n, the diagonal and the first superdiagonal are */ +/* > overwritten with the upper bidiagonal matrix B; the */ +/* > elements below the diagonal, with the array TAUQ, represent */ +/* > the orthogonal matrix Q as a product of elementary */ +/* > reflectors, and the elements above the first superdiagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors; */ +/* > if m < n, the diagonal and the first subdiagonal are */ +/* > overwritten with the lower bidiagonal matrix B; the */ +/* > elements below the first subdiagonal, with the array TAUQ, */ +/* > represent the orthogonal matrix Q as a product of */ +/* > elementary reflectors, and the elements above the diagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (f2cmin(M,N)) */ +/* > The diagonal elements of the bidiagonal matrix B: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is REAL array, dimension (f2cmin(M,N)-1) */ +/* > The off-diagonal elements of the bidiagonal matrix B: */ +/* > if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ +/* > if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUQ */ +/* > \verbatim */ +/* > TAUQ is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Q. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUP */ +/* > \verbatim */ +/* > TAUP is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix P. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (f2cmax(M,N)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrices Q and P are represented as products of elementary */ +/* > reflectors: */ +/* > */ +/* > If m >= n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ +/* > u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > If m < n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ +/* > u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples: */ +/* > */ +/* > m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ +/* > */ +/* > ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ +/* > ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ +/* > ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ +/* > ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ +/* > ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ +/* > ( v1 v2 v3 v4 v5 ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of B, vi */ +/* > denotes an element of the vector defining H(i), and ui an element of */ +/* > the vector defining G(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgebd2_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *, ftnlen), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("SGEBD2", &i__1, (ftnlen)6); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + if (i__ < *n) { + i__2 = *m - i__ + 1; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & + tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1] + ); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *n) { + +/* Generate elementary reflector G(i) to annihilate */ +/* A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + f2cmin( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.f; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + slarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } else { + taup[i__] = 0.f; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + if (i__ < *m) { + i__2 = *m - i__; + i__3 = *n - i__ + 1; + slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, & + taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *m) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Apply H(i) to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } else { + tauq[i__] = 0.f; + } +/* L20: */ + } + } + return 0; + +/* End of SGEBD2 */ + +} /* sgebd2_ */ + diff --git a/lapack-netlib/SRC/sgebrd.c b/lapack-netlib/SRC/sgebrd.c new file mode 100644 index 000000000..8c89c5136 --- /dev/null +++ b/lapack-netlib/SRC/sgebrd.c @@ -0,0 +1,783 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEBRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEBRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), */ +/* $ TAUQ( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEBRD reduces a general real M-by-N matrix A to upper or lower */ +/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */ +/* > */ +/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows in the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns in the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N general matrix to be reduced. */ +/* > On exit, */ +/* > if m >= n, the diagonal and the first superdiagonal are */ +/* > overwritten with the upper bidiagonal matrix B; the */ +/* > elements below the diagonal, with the array TAUQ, represent */ +/* > the orthogonal matrix Q as a product of elementary */ +/* > reflectors, and the elements above the first superdiagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors; */ +/* > if m < n, the diagonal and the first subdiagonal are */ +/* > overwritten with the lower bidiagonal matrix B; the */ +/* > elements below the first subdiagonal, with the array TAUQ, */ +/* > represent the orthogonal matrix Q as a product of */ +/* > elementary reflectors, and the elements above the diagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (f2cmin(M,N)) */ +/* > The diagonal elements of the bidiagonal matrix B: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is REAL array, dimension (f2cmin(M,N)-1) */ +/* > The off-diagonal elements of the bidiagonal matrix B: */ +/* > if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ +/* > if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUQ */ +/* > \verbatim */ +/* > TAUQ is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Q. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUP */ +/* > \verbatim */ +/* > TAUP is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix P. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,M,N). */ +/* > For optimum performance LWORK >= (M+N)*NB, where NB */ +/* > is the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrices Q and P are represented as products of elementary */ +/* > reflectors: */ +/* > */ +/* > If m >= n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ +/* > u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > If m < n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ +/* > u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples: */ +/* > */ +/* > m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ +/* > */ +/* > ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ +/* > ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ +/* > ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ +/* > ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ +/* > ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ +/* > ( v1 v2 v3 v4 v5 ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of B, vi */ +/* > denotes an element of the vector defining H(i), and ui an element of */ +/* > the vector defining G(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgebrd_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, nbmin, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + integer minmn; + extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, integer *); + integer nb, nx; + extern /* Subroutine */ int slabrd_(integer *, integer *, integer *, real + *, integer *, real *, real *, real *, real *, real *, integer *, + real *, integer *); + integer ws; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwrkx, ldwrky, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = f2cmax(i__1,i__2); + lwkopt = (*m + *n) * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*lwork < f2cmax(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("SGEBRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = f2cmin(*m,*n); + if (minmn == 0) { + work[1] = 1.f; + return 0; + } + + ws = f2cmax(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* Set the crossover point NX. */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (*m + *n) * nb; + if (*lwork < ws) { + +/* Not enough work space for the optimal NB, consider using */ +/* a smaller block size. */ + + nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* Reduce rows and columns i:i+nb-1 to bidiagonal form and return */ +/* the matrices X and Y which are needed to update the unreduced */ +/* part of the matrix */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + slabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update */ +/* of the form A := A - V*Y**T - X*U**T */ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ + + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & + ldwrky, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b21, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + (j + 1) * a_dim1] = e[j]; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + 1 + j * a_dim1] = e[j]; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1] = (real) ws; + return 0; + +/* End of SGEBRD */ + +} /* sgebrd_ */ + diff --git a/lapack-netlib/SRC/sgecon.c b/lapack-netlib/SRC/sgecon.c new file mode 100644 index 000000000..8d5356683 --- /dev/null +++ b/lapack-netlib/SRC/sgecon.c @@ -0,0 +1,656 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGECON */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGECON + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, */ +/* INFO ) */ + +/* CHARACTER NORM */ +/* INTEGER INFO, LDA, N */ +/* REAL ANORM, RCOND */ +/* INTEGER IWORK( * ) */ +/* REAL A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGECON estimates the reciprocal of the condition number of a general */ +/* > real matrix A, in either the 1-norm or the infinity-norm, using */ +/* > the LU factorization computed by SGETRF. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as */ +/* > RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies whether the 1-norm condition number or the */ +/* > infinity-norm condition number is required: */ +/* > = '1' or 'O': 1-norm; */ +/* > = 'I': Infinity-norm. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by SGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is REAL */ +/* > If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* > If NORM = 'I', the infinity-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(norm(A) * norm(inv(A))). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgecon_(char *norm, integer *n, real *a, integer *lda, + real *anorm, real *rcond, real *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + real r__1; + + /* Local variables */ + integer kase, kase1; + real scale; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int srscl_(integer *, real *, real *, integer *), + slacn2_(integer *, real *, real *, integer *, real *, integer *, + integer *); + real sl; + integer ix; + extern real slamch_(char *); + real su; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer isamax_(integer *, real *, integer *); + real ainvnm; + logical onenrm; + char normin[1]; + extern /* Subroutine */ int slatrs_(char *, char *, char *, char *, + integer *, real *, integer *, real *, real *, real *, integer *); + real smlnum; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O"); + if (! onenrm && ! lsame_(norm, "I")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*anorm < 0.f) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGECON", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.f; + if (*n == 0) { + *rcond = 1.f; + return 0; + } else if (*anorm == 0.f) { + return 0; + } + + smlnum = slamch_("Safe minimum"); + +/* Estimate the norm of inv(A). */ + + ainvnm = 0.f; + *(unsigned char *)normin = 'N'; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L10: + slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(L). */ + + slatrs_("Lower", "No transpose", "Unit", normin, n, &a[a_offset], + lda, &work[1], &sl, &work[(*n << 1) + 1], info); + +/* Multiply by inv(U). */ + + slatrs_("Upper", "No transpose", "Non-unit", normin, n, &a[ + a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info); + } else { + +/* Multiply by inv(U**T). */ + + slatrs_("Upper", "Transpose", "Non-unit", normin, n, &a[a_offset], + lda, &work[1], &su, &work[*n * 3 + 1], info); + +/* Multiply by inv(L**T). */ + + slatrs_("Lower", "Transpose", "Unit", normin, n, &a[a_offset], + lda, &work[1], &sl, &work[(*n << 1) + 1], info); + } + +/* Divide X by 1/(SL*SU) if doing so will not cause overflow. */ + + scale = sl * su; + *(unsigned char *)normin = 'Y'; + if (scale != 1.f) { + ix = isamax_(n, &work[1], &c__1); + if (scale < (r__1 = work[ix], abs(r__1)) * smlnum || scale == 0.f) + { + goto L20; + } + srscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.f) { + *rcond = 1.f / ainvnm / *anorm; + } + +L20: + return 0; + +/* End of SGECON */ + +} /* sgecon_ */ + diff --git a/lapack-netlib/SRC/sgeequ.c b/lapack-netlib/SRC/sgeequ.c new file mode 100644 index 000000000..165450d55 --- /dev/null +++ b/lapack-netlib/SRC/sgeequ.c @@ -0,0 +1,733 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEEQU */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEEQU + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* REAL AMAX, COLCND, ROWCND */ +/* REAL A( LDA, * ), C( * ), R( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEEQU computes row and column scalings intended to equilibrate an */ +/* > M-by-N matrix A and reduce its condition number. R returns the row */ +/* > scale factors and C the column scale factors, chosen to try to make */ +/* > the largest element in each row and column of the matrix B with */ +/* > elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. */ +/* > */ +/* > R(i) and C(j) are restricted to be between SMLNUM = smallest safe */ +/* > number and BIGNUM = largest safe number. Use of these scaling */ +/* > factors is not guaranteed to reduce the condition number of A but */ +/* > works well in practice. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > The M-by-N matrix whose equilibration factors are */ +/* > to be computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] R */ +/* > \verbatim */ +/* > R is REAL array, dimension (M) */ +/* > If INFO = 0 or INFO > M, R contains the row scale factors */ +/* > for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (N) */ +/* > If INFO = 0, C contains the column scale factors for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ROWCND */ +/* > \verbatim */ +/* > ROWCND is REAL */ +/* > If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* > smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* > AMAX is neither too large nor too small, it is not worth */ +/* > scaling by R. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] COLCND */ +/* > \verbatim */ +/* > COLCND is REAL */ +/* > If INFO = 0, COLCND contains the ratio of the smallest */ +/* > C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* > worth scaling by C. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AMAX */ +/* > \verbatim */ +/* > AMAX is REAL */ +/* > Absolute value of largest matrix element. If AMAX is very */ +/* > close to overflow or very close to underflow, the matrix */ +/* > should be scaled. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= M: the i-th row of A is exactly zero */ +/* > > M: the (i-M)-th column of A is exactly zero */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgeequ_(integer *m, integer *n, real *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer + *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1, r__2, r__3; + + /* Local variables */ + integer i__, j; + real rcmin, rcmax; + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum, smlnum; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEEQU", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.f; + *colcnd = 1.f; + *amax = 0.f; + return 0; + } + +/* Get machine constants. */ + + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.f; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = r__[i__], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1)); + r__[i__] = f2cmax(r__2,r__3); +/* L20: */ + } +/* L30: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.f; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = rcmax, r__2 = r__[i__]; + rcmax = f2cmax(r__1,r__2); +/* Computing MIN */ + r__1 = rcmin, r__2 = r__[i__]; + rcmin = f2cmin(r__1,r__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.f) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.f) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + r__2 = r__[i__]; + r__1 = f2cmax(r__2,smlnum); + r__[i__] = 1.f / f2cmin(r__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)) */ + + *rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + +/* Compute column scale factors */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.f; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = c__[j], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1)) * + r__[i__]; + c__[j] = f2cmax(r__2,r__3); +/* L80: */ + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + r__1 = rcmin, r__2 = c__[j]; + rcmin = f2cmin(r__1,r__2); +/* Computing MAX */ + r__1 = rcmax, r__2 = c__[j]; + rcmax = f2cmax(r__1,r__2); +/* L100: */ + } + + if (rcmin == 0.f) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.f) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + r__2 = c__[j]; + r__1 = f2cmax(r__2,smlnum); + c__[j] = 1.f / f2cmin(r__1,bignum); +/* L120: */ + } + +/* Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)) */ + + *colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + + return 0; + +/* End of SGEEQU */ + +} /* sgeequ_ */ + diff --git a/lapack-netlib/SRC/sgeequb.c b/lapack-netlib/SRC/sgeequb.c new file mode 100644 index 000000000..fb6055bbf --- /dev/null +++ b/lapack-netlib/SRC/sgeequb.c @@ -0,0 +1,753 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEEQUB */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEEQUB + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* REAL AMAX, COLCND, ROWCND */ +/* REAL A( LDA, * ), C( * ), R( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEEQUB computes row and column scalings intended to equilibrate an */ +/* > M-by-N matrix A and reduce its condition number. R returns the row */ +/* > scale factors and C the column scale factors, chosen to try to make */ +/* > the largest element in each row and column of the matrix B with */ +/* > elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most */ +/* > the radix. */ +/* > */ +/* > R(i) and C(j) are restricted to be a power of the radix between */ +/* > SMLNUM = smallest safe number and BIGNUM = largest safe number. Use */ +/* > of these scaling factors is not guaranteed to reduce the condition */ +/* > number of A but works well in practice. */ +/* > */ +/* > This routine differs from SGEEQU by restricting the scaling factors */ +/* > to a power of the radix. Barring over- and underflow, scaling by */ +/* > these factors introduces no additional rounding errors. However, the */ +/* > scaled entries' magnitudes are no longer approximately 1 but lie */ +/* > between sqrt(radix) and 1/sqrt(radix). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > The M-by-N matrix whose equilibration factors are */ +/* > to be computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] R */ +/* > \verbatim */ +/* > R is REAL array, dimension (M) */ +/* > If INFO = 0 or INFO > M, R contains the row scale factors */ +/* > for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (N) */ +/* > If INFO = 0, C contains the column scale factors for A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ROWCND */ +/* > \verbatim */ +/* > ROWCND is REAL */ +/* > If INFO = 0 or INFO > M, ROWCND contains the ratio of the */ +/* > smallest R(i) to the largest R(i). If ROWCND >= 0.1 and */ +/* > AMAX is neither too large nor too small, it is not worth */ +/* > scaling by R. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] COLCND */ +/* > \verbatim */ +/* > COLCND is REAL */ +/* > If INFO = 0, COLCND contains the ratio of the smallest */ +/* > C(i) to the largest C(i). If COLCND >= 0.1, it is not */ +/* > worth scaling by C. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] AMAX */ +/* > \verbatim */ +/* > AMAX is REAL */ +/* > Absolute value of largest matrix element. If AMAX is very */ +/* > close to overflow or very close to underflow, the matrix */ +/* > should be scaled. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= M: the i-th row of A is exactly zero */ +/* > > M: the (i-M)-th column of A is exactly zero */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgeequb_(integer *m, integer *n, real *a, integer *lda, + real *r__, real *c__, real *rowcnd, real *colcnd, real *amax, integer + *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1, r__2, r__3; + + /* Local variables */ + integer i__, j; + real radix, rcmin, rcmax; + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum, logrdx, smlnum; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --r__; + --c__; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEEQUB", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rowcnd = 1.f; + *colcnd = 1.f; + *amax = 0.f; + return 0; + } + +/* Get machine constants. Assume SMLNUM is a power of the radix. */ + + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + radix = slamch_("B"); + logrdx = log(radix); + +/* Compute row scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + r__[i__] = 0.f; +/* L10: */ + } + +/* Find the maximum element in each row. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = r__[i__], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1)); + r__[i__] = f2cmax(r__2,r__3); +/* L20: */ + } +/* L30: */ + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] > 0.f) { + i__2 = (integer) (log(r__[i__]) / logrdx); + r__[i__] = pow_ri(&radix, &i__2); + } + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.f; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = rcmax, r__2 = r__[i__]; + rcmax = f2cmax(r__1,r__2); +/* Computing MIN */ + r__1 = rcmin, r__2 = r__[i__]; + rcmin = f2cmin(r__1,r__2); +/* L40: */ + } + *amax = rcmax; + + if (rcmin == 0.f) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (r__[i__] == 0.f) { + *info = i__; + return 0; + } +/* L50: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MIN */ +/* Computing MAX */ + r__2 = r__[i__]; + r__1 = f2cmax(r__2,smlnum); + r__[i__] = 1.f / f2cmin(r__1,bignum); +/* L60: */ + } + +/* Compute ROWCND = f2cmin(R(I)) / f2cmax(R(I)). */ + + *rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + +/* Compute column scale factors */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 0.f; +/* L70: */ + } + +/* Find the maximum element in each column, */ +/* assuming the row scaling computed above. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = c__[j], r__3 = (r__1 = a[i__ + j * a_dim1], abs(r__1)) * + r__[i__]; + c__[j] = f2cmax(r__2,r__3); +/* L80: */ + } + if (c__[j] > 0.f) { + i__2 = (integer) (log(c__[j]) / logrdx); + c__[j] = pow_ri(&radix, &i__2); + } +/* L90: */ + } + +/* Find the maximum and minimum scale factors. */ + + rcmin = bignum; + rcmax = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + r__1 = rcmin, r__2 = c__[j]; + rcmin = f2cmin(r__1,r__2); +/* Computing MAX */ + r__1 = rcmax, r__2 = c__[j]; + rcmax = f2cmax(r__1,r__2); +/* L100: */ + } + + if (rcmin == 0.f) { + +/* Find the first zero scale factor and return an error code. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (c__[j] == 0.f) { + *info = *m + j; + return 0; + } +/* L110: */ + } + } else { + +/* Invert the scale factors. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ +/* Computing MAX */ + r__2 = c__[j]; + r__1 = f2cmax(r__2,smlnum); + c__[j] = 1.f / f2cmin(r__1,bignum); +/* L120: */ + } + +/* Compute COLCND = f2cmin(C(J)) / f2cmax(C(J)). */ + + *colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } + + return 0; + +/* End of SGEEQUB */ + +} /* sgeequb_ */ + diff --git a/lapack-netlib/SRC/sgees.c b/lapack-netlib/SRC/sgees.c new file mode 100644 index 000000000..2a4c81f5f --- /dev/null +++ b/lapack-netlib/SRC/sgees.c @@ -0,0 +1,1001 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGEES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors f +or GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEES + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, */ +/* VS, LDVS, WORK, LWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVS, SORT */ +/* INTEGER INFO, LDA, LDVS, LWORK, N, SDIM */ +/* LOGICAL BWORK( * ) */ +/* REAL A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), */ +/* $ WR( * ) */ +/* LOGICAL SELECT */ +/* EXTERNAL SELECT */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEES computes for an N-by-N real nonsymmetric matrix A, the */ +/* > eigenvalues, the real Schur form T, and, optionally, the matrix of */ +/* > Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). */ +/* > */ +/* > Optionally, it also orders the eigenvalues on the diagonal of the */ +/* > real Schur form so that selected eigenvalues are at the top left. */ +/* > The leading columns of Z then form an orthonormal basis for the */ +/* > invariant subspace corresponding to the selected eigenvalues. */ +/* > */ +/* > A matrix is in real Schur form if it is upper quasi-triangular with */ +/* > 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the */ +/* > form */ +/* > [ a b ] */ +/* > [ c a ] */ +/* > */ +/* > where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVS */ +/* > \verbatim */ +/* > JOBVS is CHARACTER*1 */ +/* > = 'N': Schur vectors are not computed; */ +/* > = 'V': Schur vectors are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELECT). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELECT */ +/* > \verbatim */ +/* > SELECT is a LOGICAL FUNCTION of two REAL arguments */ +/* > SELECT must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'S', SELECT is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > If SORT = 'N', SELECT is not referenced. */ +/* > An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */ +/* > SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex */ +/* > conjugate pair of eigenvalues is selected, then both complex */ +/* > eigenvalues are selected. */ +/* > Note that a selected complex eigenvalue may no longer */ +/* > satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned); in this */ +/* > case INFO is set to N+2 (see INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. */ +/* > On exit, A has been overwritten by its real Schur form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELECT is true. (Complex conjugate */ +/* > pairs for which SELECT is true for either */ +/* > eigenvalue count as 2.) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WR */ +/* > \verbatim */ +/* > WR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WI */ +/* > \verbatim */ +/* > WI is REAL array, dimension (N) */ +/* > WR and WI contain the real and imaginary parts, */ +/* > respectively, of the computed eigenvalues in the same order */ +/* > that they appear on the diagonal of the output Schur form T. */ +/* > Complex conjugate pairs of eigenvalues will appear */ +/* > consecutively with the eigenvalue having the positive */ +/* > imaginary part first. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VS */ +/* > \verbatim */ +/* > VS is REAL array, dimension (LDVS,N) */ +/* > If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */ +/* > vectors. */ +/* > If JOBVS = 'N', VS is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVS */ +/* > \verbatim */ +/* > LDVS is INTEGER */ +/* > The leading dimension of the array VS. LDVS >= 1; if */ +/* > JOBVS = 'V', LDVS >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) contains the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,3*N). */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: the QR algorithm failed to compute all the */ +/* > eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */ +/* > contain those eigenvalues which have converged; if */ +/* > JOBVS = 'V', VS contains the matrix which reduces A */ +/* > to its partially converged Schur form. */ +/* > = N+1: the eigenvalues could not be reordered because some */ +/* > eigenvalues were too close to separate (the problem */ +/* > is very ill-conditioned); */ +/* > = N+2: after reordering, roundoff changed values of some */ +/* > complex eigenvalues so that leading eigenvalues in */ +/* > the Schur form no longer satisfy SELECT=.TRUE. This */ +/* > could also be caused by underflow due to scaling. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup realGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int sgees_(char *jobvs, char *sort, L_fp select, integer *n, + real *a, integer *lda, integer *sdim, real *wr, real *wi, real *vs, + integer *ldvs, real *work, integer *lwork, logical *bwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3; + + /* Local variables */ + integer ibal; + real anrm; + integer idum[1], ierr, itau, iwrk, inxt, i__; + real s; + integer icond, ieval; + extern logical lsame_(char *, char *); + logical cursl; + integer i1, i2; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), sswap_(integer *, real *, integer *, real *, integer * + ); + logical lst2sl; + extern /* Subroutine */ int slabad_(real *, real *); + logical scalea; + integer ip; + real cscale; + extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, + integer *, integer *, real *, integer *); + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), xerbla_(char + *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, + real *, integer *); + logical lastsl; + extern /* Subroutine */ int sorghr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), shseqr_(char + *, char *, integer *, integer *, integer *, real *, integer *, + real *, real *, real *, integer *, real *, integer *, integer *); + integer minwrk, maxwrk; + real smlnum; + integer hswork; + extern /* Subroutine */ int strsen_(char *, char *, logical *, integer *, + real *, integer *, real *, integer *, real *, real *, integer *, + real *, real *, real *, integer *, integer *, integer *, integer * + ); + logical wantst, lquery, wantvs; + integer ihi, ilo; + real dum[1], eps, sep; + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --wr; + --wi; + vs_dim1 = *ldvs; + vs_offset = 1 + vs_dim1 * 1; + vs -= vs_offset; + --work; + --bwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvs = lsame_(jobvs, "V"); + wantst = lsame_(sort, "S"); + if (! wantvs && ! lsame_(jobvs, "N")) { + *info = -1; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldvs < 1 || wantvs && *ldvs < *n) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by SHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, + n, &c__0, (ftnlen)6, (ftnlen)1); + minwrk = *n * 3; + + shseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1] + , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval); + hswork = work[1]; + + if (! wantvs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = f2cmax(i__1,i__2); + } + } + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEES ", &i__1, (ftnlen)5); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Workspace: need N) */ + + ibal = 1; + sgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* Reduce to upper Hessenberg form */ +/* (Workspace: need 3*N, prefer 2*N+N*NB) */ + + itau = *n + ibal; + iwrk = *n + itau; + i__1 = *lwork - iwrk + 1; + sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvs) { + +/* Copy Householder vectors to VS */ + + slacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs) + ; + +/* Generate orthogonal matrix in VS */ +/* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk], + &i__1, &ierr); + } + + *sdim = 0; + +/* Perform QR iteration, accumulating Schur vectors in VS if desired */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[ + vs_offset], ldvs, &work[iwrk], &i__1, &ieval); + if (ieval > 0) { + *info = ieval; + } + +/* Sort eigenvalues if desired */ + + if (wantst && *info == 0) { + if (scalea) { + slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, & + ierr); + slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, & + ierr); + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*select)(&wr[i__], &wi[i__]); +/* L10: */ + } + +/* Reorder eigenvalues and transform Schur vectors */ +/* (Workspace: none needed) */ + + i__1 = *lwork - iwrk + 1; + strsen_("N", jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], + ldvs, &wr[1], &wi[1], sdim, &s, &sep, &work[iwrk], &i__1, + idum, &c__1, &icond); + if (icond > 0) { + *info = *n + icond; + } + } + + if (wantvs) { + +/* Undo balancing */ +/* (Workspace: need N) */ + + sgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs, + &ierr); + } + + if (scalea) { + +/* Undo scaling for the Schur form of A */ + + slascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, & + ierr); + i__1 = *lda + 1; + scopy_(n, &a[a_offset], &i__1, &wr[1], &c__1); + if (cscale == smlnum) { + +/* If scaling back towards underflow, adjust WI if an */ +/* offdiagonal element of a 2-by-2 block in the Schur form */ +/* underflows. */ + + if (ieval > 0) { + i1 = ieval + 1; + i2 = ihi - 1; + i__1 = ilo - 1; +/* Computing MAX */ + i__3 = ilo - 1; + i__2 = f2cmax(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ + 1], &i__2, &ierr); + } else if (wantst) { + i1 = 1; + i2 = *n - 1; + } else { + i1 = ilo; + i2 = ihi - 1; + } + inxt = i1 - 1; + i__1 = i2; + for (i__ = i1; i__ <= i__1; ++i__) { + if (i__ < inxt) { + goto L20; + } + if (wi[i__] == 0.f) { + inxt = i__ + 1; + } else { + if (a[i__ + 1 + i__ * a_dim1] == 0.f) { + wi[i__] = 0.f; + wi[i__ + 1] = 0.f; + } else if (a[i__ + 1 + i__ * a_dim1] != 0.f && a[i__ + ( + i__ + 1) * a_dim1] == 0.f) { + wi[i__] = 0.f; + wi[i__ + 1] = 0.f; + if (i__ > 1) { + i__2 = i__ - 1; + sswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[( + i__ + 1) * a_dim1 + 1], &c__1); + } + if (*n > i__ + 1) { + i__2 = *n - i__ - 1; + sswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, & + a[i__ + 1 + (i__ + 2) * a_dim1], lda); + } + if (wantvs) { + sswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ + + 1) * vs_dim1 + 1], &c__1); + } + a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * + a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 0.f; + } + inxt = i__ + 2; + } +L20: + ; + } + } + +/* Undo scaling for the imaginary part of the eigenvalues */ + + i__1 = *n - ieval; +/* Computing MAX */ + i__3 = *n - ieval; + i__2 = f2cmax(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + + 1], &i__2, &ierr); + } + + if (wantst && *info == 0) { + +/* Check if reordering successful */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*select)(&wr[i__], &wi[i__]); + if (wi[i__] == 0.f) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L30: */ + } + } + + work[1] = (real) maxwrk; + return 0; + +/* End of SGEES */ + +} /* sgees_ */ + diff --git a/lapack-netlib/SRC/sgeesx.c b/lapack-netlib/SRC/sgeesx.c new file mode 100644 index 000000000..ff8bffda6 --- /dev/null +++ b/lapack-netlib/SRC/sgeesx.c @@ -0,0 +1,1120 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGEESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors +for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEESX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, */ +/* WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, */ +/* IWORK, LIWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVS, SENSE, SORT */ +/* INTEGER INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM */ +/* REAL RCONDE, RCONDV */ +/* LOGICAL BWORK( * ) */ +/* INTEGER IWORK( * ) */ +/* REAL A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), */ +/* $ WR( * ) */ +/* LOGICAL SELECT */ +/* EXTERNAL SELECT */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEESX computes for an N-by-N real nonsymmetric matrix A, the */ +/* > eigenvalues, the real Schur form T, and, optionally, the matrix of */ +/* > Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). */ +/* > */ +/* > Optionally, it also orders the eigenvalues on the diagonal of the */ +/* > real Schur form so that selected eigenvalues are at the top left; */ +/* > computes a reciprocal condition number for the average of the */ +/* > selected eigenvalues (RCONDE); and computes a reciprocal condition */ +/* > number for the right invariant subspace corresponding to the */ +/* > selected eigenvalues (RCONDV). The leading columns of Z form an */ +/* > orthonormal basis for this invariant subspace. */ +/* > */ +/* > For further explanation of the reciprocal condition numbers RCONDE */ +/* > and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where */ +/* > these quantities are called s and sep respectively). */ +/* > */ +/* > A real matrix is in real Schur form if it is upper quasi-triangular */ +/* > with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in */ +/* > the form */ +/* > [ a b ] */ +/* > [ c a ] */ +/* > */ +/* > where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVS */ +/* > \verbatim */ +/* > JOBVS is CHARACTER*1 */ +/* > = 'N': Schur vectors are not computed; */ +/* > = 'V': Schur vectors are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELECT). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELECT */ +/* > \verbatim */ +/* > SELECT is a LOGICAL FUNCTION of two REAL arguments */ +/* > SELECT must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'S', SELECT is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > If SORT = 'N', SELECT is not referenced. */ +/* > An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if */ +/* > SELECT(WR(j),WI(j)) is true; i.e., if either one of a */ +/* > complex conjugate pair of eigenvalues is selected, then both */ +/* > are. Note that a selected complex eigenvalue may no longer */ +/* > satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since */ +/* > ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned); in this */ +/* > case INFO may be set to N+3 (see INFO below). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': None are computed; */ +/* > = 'E': Computed for average of selected eigenvalues only; */ +/* > = 'V': Computed for selected right invariant subspace only; */ +/* > = 'B': Computed for both. */ +/* > If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the N-by-N matrix A. */ +/* > On exit, A is overwritten by its real Schur form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELECT is true. (Complex conjugate */ +/* > pairs for which SELECT is true for either */ +/* > eigenvalue count as 2.) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WR */ +/* > \verbatim */ +/* > WR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WI */ +/* > \verbatim */ +/* > WI is REAL array, dimension (N) */ +/* > WR and WI contain the real and imaginary parts, respectively, */ +/* > of the computed eigenvalues, in the same order that they */ +/* > appear on the diagonal of the output Schur form T. Complex */ +/* > conjugate pairs of eigenvalues appear consecutively with the */ +/* > eigenvalue having the positive imaginary part first. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VS */ +/* > \verbatim */ +/* > VS is REAL array, dimension (LDVS,N) */ +/* > If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur */ +/* > vectors. */ +/* > If JOBVS = 'N', VS is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVS */ +/* > \verbatim */ +/* > LDVS is INTEGER */ +/* > The leading dimension of the array VS. LDVS >= 1, and if */ +/* > JOBVS = 'V', LDVS >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is REAL */ +/* > If SENSE = 'E' or 'B', RCONDE contains the reciprocal */ +/* > condition number for the average of the selected eigenvalues. */ +/* > Not referenced if SENSE = 'N' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is REAL */ +/* > If SENSE = 'V' or 'B', RCONDV contains the reciprocal */ +/* > condition number for the selected right invariant subspace. */ +/* > Not referenced if SENSE = 'N' or 'E'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,3*N). */ +/* > Also, if SENSE = 'E' or 'V' or 'B', */ +/* > LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of */ +/* > selected eigenvalues computed by this routine. Note that */ +/* > N+2*SDIM*(N-SDIM) <= N+N*N/2. Note also that an error is only */ +/* > returned if LWORK < f2cmax(1,3*N), but if SENSE = 'E' or 'V' or */ +/* > 'B' this may not be large enough. */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates upper bounds on the optimal sizes of the */ +/* > arrays WORK and IWORK, returns these values as the first */ +/* > entries of the WORK and IWORK arrays, and no error messages */ +/* > related to LWORK or LIWORK are issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). */ +/* > Note that SDIM*(N-SDIM) <= N*N/4. Note also that an error is */ +/* > only returned if LIWORK < 1, but if SENSE = 'V' or 'B' this */ +/* > may not be large enough. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates upper bounds on the optimal sizes of */ +/* > the arrays WORK and IWORK, returns these values as the first */ +/* > entries of the WORK and IWORK arrays, and no error messages */ +/* > related to LWORK or LIWORK are issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: the QR algorithm failed to compute all the */ +/* > eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI */ +/* > contain those eigenvalues which have converged; if */ +/* > JOBVS = 'V', VS contains the transformation which */ +/* > reduces A to its partially converged Schur form. */ +/* > = N+1: the eigenvalues could not be reordered because some */ +/* > eigenvalues were too close to separate (the problem */ +/* > is very ill-conditioned); */ +/* > = N+2: after reordering, roundoff changed values of some */ +/* > complex eigenvalues so that leading eigenvalues in */ +/* > the Schur form no longer satisfy SELECT=.TRUE. This */ +/* > could also be caused by underflow due to scaling. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup realGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int sgeesx_(char *jobvs, char *sort, L_fp select, char * + sense, integer *n, real *a, integer *lda, integer *sdim, real *wr, + real *wi, real *vs, integer *ldvs, real *rconde, real *rcondv, real * + work, integer *lwork, integer *iwork, integer *liwork, logical *bwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vs_dim1, vs_offset, i__1, i__2, i__3; + + /* Local variables */ + integer ibal; + real anrm; + integer ierr, itau, iwrk, lwrk, inxt, i__, icond, ieval; + extern logical lsame_(char *, char *); + logical cursl; + integer liwrk, i1, i2; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), sswap_(integer *, real *, integer *, real *, integer * + ); + logical lst2sl; + extern /* Subroutine */ int slabad_(real *, real *); + logical scalea; + integer ip; + real cscale; + extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, + integer *, integer *, real *, integer *); + extern real slamch_(char *); + extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), xerbla_(char + *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern real slange_(char *, integer *, integer *, real *, integer *, real + *); + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, + real *, integer *); + logical wantsb, wantse, lastsl; + extern /* Subroutine */ int sorghr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), shseqr_(char + *, char *, integer *, integer *, integer *, real *, integer *, + real *, real *, real *, integer *, real *, integer *, integer *); + integer minwrk, maxwrk; + logical wantsn; + real smlnum; + integer hswork; + extern /* Subroutine */ int strsen_(char *, char *, logical *, integer *, + real *, integer *, real *, integer *, real *, real *, integer *, + real *, real *, real *, integer *, integer *, integer *, integer * + ); + logical wantst, lquery, wantsv, wantvs; + integer ihi, ilo; + real dum[1], eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --wr; + --wi; + vs_dim1 = *ldvs; + vs_offset = 1 + vs_dim1 * 1; + vs -= vs_offset; + --work; + --iwork; + --bwork; + + /* Function Body */ + *info = 0; + wantvs = lsame_(jobvs, "V"); + wantst = lsame_(sort, "S"); + wantsn = lsame_(sense, "N"); + wantse = lsame_(sense, "E"); + wantsv = lsame_(sense, "V"); + wantsb = lsame_(sense, "B"); + lquery = *lwork == -1 || *liwork == -1; + + if (! wantvs && ! lsame_(jobvs, "N")) { + *info = -1; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -2; + } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! + wantsn) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvs < 1 || wantvs && *ldvs < *n) { + *info = -12; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "RWorkspace:" describe the */ +/* minimal amount of real workspace needed at that point in the */ +/* code, as well as the preferred amount for good performance. */ +/* IWorkspace refers to integer workspace. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by SHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case. */ +/* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed */ +/* depends on SDIM, which is computed by the routine STRSEN later */ +/* in the code.) */ + + if (*info == 0) { + liwrk = 1; + if (*n == 0) { + minwrk = 1; + lwrk = 1; + } else { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, + n, &c__0, (ftnlen)6, (ftnlen)1); + minwrk = *n * 3; + + shseqr_("S", jobvs, n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1] + , &vs[vs_offset], ldvs, &work[1], &c_n1, &ieval); + hswork = work[1]; + + if (! wantvs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + hswork; + maxwrk = f2cmax(i__1,i__2); + } + lwrk = maxwrk; + if (! wantsn) { +/* Computing MAX */ + i__1 = lwrk, i__2 = *n + *n * *n / 2; + lwrk = f2cmax(i__1,i__2); + } + if (wantsv || wantsb) { + liwrk = *n * *n / 4; + } + } + iwork[1] = liwrk; + work[1] = (real) lwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -16; + } else if (*liwork < 1 && ! lquery) { + *info = -18; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEESX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (RWorkspace: need N) */ + + ibal = 1; + sgebal_("P", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* Reduce to upper Hessenberg form */ +/* (RWorkspace: need 3*N, prefer 2*N+N*NB) */ + + itau = *n + ibal; + iwrk = *n + itau; + i__1 = *lwork - iwrk + 1; + sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvs) { + +/* Copy Householder vectors to VS */ + + slacpy_("L", n, n, &a[a_offset], lda, &vs[vs_offset], ldvs) + ; + +/* Generate orthogonal matrix in VS */ +/* (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, &ilo, &ihi, &vs[vs_offset], ldvs, &work[itau], &work[iwrk], + &i__1, &ierr); + } + + *sdim = 0; + +/* Perform QR iteration, accumulating Schur vectors in VS if desired */ +/* (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", jobvs, n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vs[ + vs_offset], ldvs, &work[iwrk], &i__1, &ieval); + if (ieval > 0) { + *info = ieval; + } + +/* Sort eigenvalues if desired */ + + if (wantst && *info == 0) { + if (scalea) { + slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wr[1], n, & + ierr); + slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &wi[1], n, & + ierr); + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*select)(&wr[i__], &wi[i__]); +/* L10: */ + } + +/* Reorder eigenvalues, transform Schur vectors, and compute */ +/* reciprocal condition numbers */ +/* (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) */ +/* otherwise, need N ) */ +/* (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) */ +/* otherwise, need 0 ) */ + + i__1 = *lwork - iwrk + 1; + strsen_(sense, jobvs, &bwork[1], n, &a[a_offset], lda, &vs[vs_offset], + ldvs, &wr[1], &wi[1], sdim, rconde, rcondv, &work[iwrk], & + i__1, &iwork[1], liwork, &icond); + if (! wantsn) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*sdim << 1) * (*n - *sdim); + maxwrk = f2cmax(i__1,i__2); + } + if (icond == -15) { + +/* Not enough real workspace */ + + *info = -16; + } else if (icond == -17) { + +/* Not enough integer workspace */ + + *info = -18; + } else if (icond > 0) { + +/* STRSEN failed to reorder or to restore standard Schur form */ + + *info = icond + *n; + } + } + + if (wantvs) { + +/* Undo balancing */ +/* (RWorkspace: need N) */ + + sgebak_("P", "R", n, &ilo, &ihi, &work[ibal], n, &vs[vs_offset], ldvs, + &ierr); + } + + if (scalea) { + +/* Undo scaling for the Schur form of A */ + + slascl_("H", &c__0, &c__0, &cscale, &anrm, n, n, &a[a_offset], lda, & + ierr); + i__1 = *lda + 1; + scopy_(n, &a[a_offset], &i__1, &wr[1], &c__1); + if ((wantsv || wantsb) && *info == 0) { + dum[0] = *rcondv; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, & + c__1, &ierr); + *rcondv = dum[0]; + } + if (cscale == smlnum) { + +/* If scaling back towards underflow, adjust WI if an */ +/* offdiagonal element of a 2-by-2 block in the Schur form */ +/* underflows. */ + + if (ieval > 0) { + i1 = ieval + 1; + i2 = ihi - 1; + i__1 = ilo - 1; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ + 1], n, &ierr); + } else if (wantst) { + i1 = 1; + i2 = *n - 1; + } else { + i1 = ilo; + i2 = ihi - 1; + } + inxt = i1 - 1; + i__1 = i2; + for (i__ = i1; i__ <= i__1; ++i__) { + if (i__ < inxt) { + goto L20; + } + if (wi[i__] == 0.f) { + inxt = i__ + 1; + } else { + if (a[i__ + 1 + i__ * a_dim1] == 0.f) { + wi[i__] = 0.f; + wi[i__ + 1] = 0.f; + } else if (a[i__ + 1 + i__ * a_dim1] != 0.f && a[i__ + ( + i__ + 1) * a_dim1] == 0.f) { + wi[i__] = 0.f; + wi[i__ + 1] = 0.f; + if (i__ > 1) { + i__2 = i__ - 1; + sswap_(&i__2, &a[i__ * a_dim1 + 1], &c__1, &a[( + i__ + 1) * a_dim1 + 1], &c__1); + } + if (*n > i__ + 1) { + i__2 = *n - i__ - 1; + sswap_(&i__2, &a[i__ + (i__ + 2) * a_dim1], lda, & + a[i__ + 1 + (i__ + 2) * a_dim1], lda); + } + if (wantvs) { + sswap_(n, &vs[i__ * vs_dim1 + 1], &c__1, &vs[(i__ + + 1) * vs_dim1 + 1], &c__1); + } + a[i__ + (i__ + 1) * a_dim1] = a[i__ + 1 + i__ * + a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 0.f; + } + inxt = i__ + 2; + } +L20: + ; + } + } + i__1 = *n - ieval; +/* Computing MAX */ + i__3 = *n - ieval; + i__2 = f2cmax(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[ieval + + 1], &i__2, &ierr); + } + + if (wantst && *info == 0) { + +/* Check if reordering successful */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*select)(&wr[i__], &wi[i__]); + if (wi[i__] == 0.f) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L30: */ + } + } + + work[1] = (real) maxwrk; + if (wantsv || wantsb) { + iwork[1] = *sdim * (*n - *sdim); + } else { + iwork[1] = 1; + } + + return 0; + +/* End of SGEESX */ + +} /* sgeesx_ */ + diff --git a/lapack-netlib/SRC/sgeev.c b/lapack-netlib/SRC/sgeev.c new file mode 100644 index 000000000..626b1d577 --- /dev/null +++ b/lapack-netlib/SRC/sgeev.c @@ -0,0 +1,1029 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matr +ices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, */ +/* LDVR, WORK, LWORK, INFO ) */ + +/* CHARACTER JOBVL, JOBVR */ +/* INTEGER INFO, LDA, LDVL, LDVR, LWORK, N */ +/* REAL A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WI( * ), WORK( * ), WR( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEEV computes for an N-by-N real nonsymmetric matrix A, the */ +/* > eigenvalues and, optionally, the left and/or right eigenvectors. */ +/* > */ +/* > The right eigenvector v(j) of A satisfies */ +/* > A * v(j) = lambda(j) * v(j) */ +/* > where lambda(j) is its eigenvalue. */ +/* > The left eigenvector u(j) of A satisfies */ +/* > u(j)**H * A = lambda(j) * u(j)**H */ +/* > where u(j)**H denotes the conjugate-transpose of u(j). */ +/* > */ +/* > The computed eigenvectors are normalized to have Euclidean norm */ +/* > equal to 1 and largest component real. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': left eigenvectors of A are not computed; */ +/* > = 'V': left eigenvectors of A are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': right eigenvectors of A are not computed; */ +/* > = 'V': right eigenvectors of A are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. */ +/* > On exit, A has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WR */ +/* > \verbatim */ +/* > WR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WI */ +/* > \verbatim */ +/* > WI is REAL array, dimension (N) */ +/* > WR and WI contain the real and imaginary parts, */ +/* > respectively, of the computed eigenvalues. Complex */ +/* > conjugate pairs of eigenvalues appear consecutively */ +/* > with the eigenvalue having the positive imaginary part */ +/* > first. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is REAL array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* > after another in the columns of VL, in the same order */ +/* > as their eigenvalues. */ +/* > If JOBVL = 'N', VL is not referenced. */ +/* > If the j-th eigenvalue is real, then u(j) = VL(:,j), */ +/* > the j-th column of VL. */ +/* > If the j-th and (j+1)-st eigenvalues form a complex */ +/* > conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */ +/* > u(j+1) = VL(:,j) - i*VL(:,j+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the array VL. LDVL >= 1; if */ +/* > JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is REAL array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* > after another in the columns of VR, in the same order */ +/* > as their eigenvalues. */ +/* > If JOBVR = 'N', VR is not referenced. */ +/* > If the j-th eigenvalue is real, then v(j) = VR(:,j), */ +/* > the j-th column of VR. */ +/* > If the j-th and (j+1)-st eigenvalues form a complex */ +/* > conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */ +/* > v(j+1) = VR(:,j) - i*VR(:,j+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the array VR. LDVR >= 1; if */ +/* > JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,3*N), and */ +/* > if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good */ +/* > performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the QR algorithm failed to compute all the */ +/* > eigenvalues, and no eigenvectors have been computed; */ +/* > elements i+1:N of WR and WI contain eigenvalues which */ +/* > have converged. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* @generated from dgeev.f, fortran d -> s, Tue Apr 19 01:47:44 2016 */ + +/* > \ingroup realGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int sgeev_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr, + integer *ldvr, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + real r__1, r__2; + + /* Local variables */ + integer ibal; + char side[1]; + real anrm; + integer ierr, itau, iwrk, nout; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + extern real snrm2_(integer *, real *, integer *); + integer i__, k; + real r__; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + extern real slapy2_(real *, real *); + real cs; + extern /* Subroutine */ int slabad_(real *, real *); + logical scalea; + real cscale; + extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, + integer *, integer *, real *, integer *); + real sn; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), xerbla_(char + *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + logical select[1]; + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slartg_(real *, real *, + real *, real *, real *), sorghr_(integer *, integer *, integer *, + real *, integer *, real *, real *, integer *, integer *), shseqr_( + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, real *, real *, integer *, real *, integer *, integer *); + integer minwrk, maxwrk; + logical wantvl; + real smlnum; + integer hswork; + logical lquery, wantvr; + extern /* Subroutine */ int strevc3_(char *, char *, logical *, integer *, + real *, integer *, real *, integer *, real *, integer *, integer + *, integer *, real *, integer *, integer *); + integer ihi; + real scl; + integer ilo; + real dum[1], eps; + integer lwork_trevc__; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by SHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, + n, &c__0, (ftnlen)6, (ftnlen)1); + if (wantvl) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = f2cmax(i__1,i__2); + shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = * + n + hswork; + maxwrk = f2cmax(i__1,i__2); + strevc3_("L", "B", select, n, &a[a_offset], lda, &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, & + work[1], &c_n1, &ierr); + lwork_trevc__ = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = f2cmax(i__1,i__2); + } else if (wantvr) { + minwrk = *n << 2; +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, + "SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = f2cmax(i__1,i__2); + shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = * + n + hswork; + maxwrk = f2cmax(i__1,i__2); + strevc3_("R", "B", select, n, &a[a_offset], lda, &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, & + work[1], &c_n1, &ierr); + lwork_trevc__ = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = f2cmax(i__1,i__2); + } else { + minwrk = *n * 3; + shseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + hswork = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = * + n + hswork; + maxwrk = f2cmax(i__1,i__2); + } + maxwrk = f2cmax(maxwrk,minwrk); + } + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEEV ", &i__1, (ftnlen)5); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Balance the matrix */ +/* (Workspace: need N) */ + + ibal = 1; + sgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* Reduce to upper Hessenberg form */ +/* (Workspace: need 3*N, prefer 2*N+N*NB) */ + + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* Want left eigenvectors */ +/* Copy Householder vectors to VL */ + + *(unsigned char *)side = 'L'; + slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* Generate orthogonal matrix in VL */ +/* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VL */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vl[vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* Want left and right eigenvectors */ +/* Copy Schur vectors to VR */ + + *(unsigned char *)side = 'B'; + slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* Want right eigenvectors */ +/* Copy Householder vectors to VR */ + + *(unsigned char *)side = 'R'; + slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* Generate orthogonal matrix in VR */ +/* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VR */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* Compute eigenvalues only */ +/* (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO .NE. 0 from SHSEQR, then quit */ + + if (*info != 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* Compute left and/or right eigenvectors */ +/* (Workspace: need 4*N, prefer N + N + 2*N*NB) */ + + i__1 = *lwork - iwrk + 1; + strevc3_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &i__1, & + ierr); + } + + if (wantvl) { + +/* Undo balancing of left eigenvectors */ +/* (Workspace: need N) */ + + sgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.f) { + scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.f) { + r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1.f / slapy2_(&r__1, &r__2); + sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + r__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + r__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L10: */ + } + k = isamax_(n, &work[iwrk], &c__1); + slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.f; + } +/* L20: */ + } + } + + if (wantvr) { + +/* Undo balancing of right eigenvectors */ +/* (Workspace: need N) */ + + sgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.f) { + scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.f) { + r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1.f / slapy2_(&r__1, &r__2); + sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + r__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + r__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L30: */ + } + k = isamax_(n, &work[iwrk], &c__1); + slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.f; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = f2cmax(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = f2cmax(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = ilo - 1; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (real) maxwrk; + return 0; + +/* End of SGEEV */ + +} /* sgeev_ */ + diff --git a/lapack-netlib/SRC/sgeevx.c b/lapack-netlib/SRC/sgeevx.c new file mode 100644 index 000000000..89648a794 --- /dev/null +++ b/lapack-netlib/SRC/sgeevx.c @@ -0,0 +1,1197 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEEVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, */ +/* VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, */ +/* RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) */ + +/* CHARACTER BALANC, JOBVL, JOBVR, SENSE */ +/* INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N */ +/* REAL ABNRM */ +/* INTEGER IWORK( * ) */ +/* REAL A( LDA, * ), RCONDE( * ), RCONDV( * ), */ +/* $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WI( * ), WORK( * ), WR( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEEVX computes for an N-by-N real nonsymmetric matrix A, the */ +/* > eigenvalues and, optionally, the left and/or right eigenvectors. */ +/* > */ +/* > Optionally also, it computes a balancing transformation to improve */ +/* > the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ +/* > SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues */ +/* > (RCONDE), and reciprocal condition numbers for the right */ +/* > eigenvectors (RCONDV). */ +/* > */ +/* > The right eigenvector v(j) of A satisfies */ +/* > A * v(j) = lambda(j) * v(j) */ +/* > where lambda(j) is its eigenvalue. */ +/* > The left eigenvector u(j) of A satisfies */ +/* > u(j)**H * A = lambda(j) * u(j)**H */ +/* > where u(j)**H denotes the conjugate-transpose of u(j). */ +/* > */ +/* > The computed eigenvectors are normalized to have Euclidean norm */ +/* > equal to 1 and largest component real. */ +/* > */ +/* > Balancing a matrix means permuting the rows and columns to make it */ +/* > more nearly upper triangular, and applying a diagonal similarity */ +/* > transformation D * A * D**(-1), where D is a diagonal matrix, to */ +/* > make its rows and columns closer in norm and the condition numbers */ +/* > of its eigenvalues and eigenvectors smaller. The computed */ +/* > reciprocal condition numbers correspond to the balanced matrix. */ +/* > Permuting rows and columns will not change the condition numbers */ +/* > (in exact arithmetic) but diagonal scaling will. For further */ +/* > explanation of balancing, see section 4.10.2 of the LAPACK */ +/* > Users' Guide. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] BALANC */ +/* > \verbatim */ +/* > BALANC is CHARACTER*1 */ +/* > Indicates how the input matrix should be diagonally scaled */ +/* > and/or permuted to improve the conditioning of its */ +/* > eigenvalues. */ +/* > = 'N': Do not diagonally scale or permute; */ +/* > = 'P': Perform permutations to make the matrix more nearly */ +/* > upper triangular. Do not diagonally scale; */ +/* > = 'S': Diagonally scale the matrix, i.e. replace A by */ +/* > D*A*D**(-1), where D is a diagonal matrix chosen */ +/* > to make the rows and columns of A more equal in */ +/* > norm. Do not permute; */ +/* > = 'B': Both diagonally scale and permute A. */ +/* > */ +/* > Computed reciprocal condition numbers will be for the matrix */ +/* > after balancing and/or permuting. Permuting does not change */ +/* > condition numbers (in exact arithmetic), but balancing does. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': left eigenvectors of A are not computed; */ +/* > = 'V': left eigenvectors of A are computed. */ +/* > If SENSE = 'E' or 'B', JOBVL must = 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': right eigenvectors of A are not computed; */ +/* > = 'V': right eigenvectors of A are computed. */ +/* > If SENSE = 'E' or 'B', JOBVR must = 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': None are computed; */ +/* > = 'E': Computed for eigenvalues only; */ +/* > = 'V': Computed for right eigenvectors only; */ +/* > = 'B': Computed for eigenvalues and right eigenvectors. */ +/* > */ +/* > If SENSE = 'E' or 'B', both left and right eigenvectors */ +/* > must also be computed (JOBVL = 'V' and JOBVR = 'V'). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. */ +/* > On exit, A has been overwritten. If JOBVL = 'V' or */ +/* > JOBVR = 'V', A contains the real Schur form of the balanced */ +/* > version of the input matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WR */ +/* > \verbatim */ +/* > WR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WI */ +/* > \verbatim */ +/* > WI is REAL array, dimension (N) */ +/* > WR and WI contain the real and imaginary parts, */ +/* > respectively, of the computed eigenvalues. Complex */ +/* > conjugate pairs of eigenvalues will appear consecutively */ +/* > with the eigenvalue having the positive imaginary part */ +/* > first. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is REAL array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* > after another in the columns of VL, in the same order */ +/* > as their eigenvalues. */ +/* > If JOBVL = 'N', VL is not referenced. */ +/* > If the j-th eigenvalue is real, then u(j) = VL(:,j), */ +/* > the j-th column of VL. */ +/* > If the j-th and (j+1)-st eigenvalues form a complex */ +/* > conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */ +/* > u(j+1) = VL(:,j) - i*VL(:,j+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the array VL. LDVL >= 1; if */ +/* > JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is REAL array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* > after another in the columns of VR, in the same order */ +/* > as their eigenvalues. */ +/* > If JOBVR = 'N', VR is not referenced. */ +/* > If the j-th eigenvalue is real, then v(j) = VR(:,j), */ +/* > the j-th column of VR. */ +/* > If the j-th and (j+1)-st eigenvalues form a complex */ +/* > conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */ +/* > v(j+1) = VR(:,j) - i*VR(:,j+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the array VR. LDVR >= 1, and if */ +/* > JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are integer values determined when A was */ +/* > balanced. The balanced A(i,j) = 0 if I > J and */ +/* > J = 1,...,ILO-1 or I = IHI+1,...,N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > when balancing A. If P(j) is the index of the row and column */ +/* > interchanged with row and column j, and D(j) is the scaling */ +/* > factor applied to row and column j, then */ +/* > SCALE(J) = P(J), for J = 1,...,ILO-1 */ +/* > = D(J), for J = ILO,...,IHI */ +/* > = P(J) for J = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ABNRM */ +/* > \verbatim */ +/* > ABNRM is REAL */ +/* > The one-norm of the balanced matrix (the maximum */ +/* > of the sum of absolute values of elements of any column). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is REAL array, dimension (N) */ +/* > RCONDE(j) is the reciprocal condition number of the j-th */ +/* > eigenvalue. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is REAL array, dimension (N) */ +/* > RCONDV(j) is the reciprocal condition number of the j-th */ +/* > right eigenvector. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. If SENSE = 'N' or 'E', */ +/* > LWORK >= f2cmax(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', */ +/* > LWORK >= 3*N. If SENSE = 'V' or 'B', LWORK >= N*(N+6). */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (2*N-2) */ +/* > If SENSE = 'N' or 'E', not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = i, the QR algorithm failed to compute all the */ +/* > eigenvalues, and no eigenvectors or condition numbers */ +/* > have been computed; elements 1:ILO-1 and i+1:N of WR */ +/* > and WI contain eigenvalues which have converged. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* @generated from dgeevx.f, fortran d -> s, Tue Apr 19 01:47:44 2016 */ + +/* > \ingroup realGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int sgeevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, real *a, integer *lda, real *wr, real *wi, real * + vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer * + ihi, real *scale, real *abnrm, real *rconde, real *rcondv, real *work, + integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + real r__1, r__2; + + /* Local variables */ + char side[1]; + real anrm; + integer ierr, itau, iwrk, nout; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + extern real snrm2_(integer *, real *, integer *); + integer i__, k; + real r__; + integer icond; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + extern real slapy2_(real *, real *); + real cs; + extern /* Subroutine */ int slabad_(real *, real *); + logical scalea; + real cscale; + extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, + integer *, integer *, real *, integer *); + real sn; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), xerbla_(char + *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + logical select[1]; + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slartg_(real *, real *, + real *, real *, real *), sorghr_(integer *, integer *, integer *, + real *, integer *, real *, real *, integer *, integer *), shseqr_( + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, real *, real *, integer *, real *, integer *, integer *); + integer minwrk, maxwrk; + extern /* Subroutine */ int strsna_(char *, char *, logical *, integer *, + real *, integer *, real *, integer *, real *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *, + integer *); + logical wantvl, wntsnb; + integer hswork; + logical wntsne; + real smlnum; + logical lquery, wantvr, wntsnn, wntsnv; + extern /* Subroutine */ int strevc3_(char *, char *, logical *, integer *, + real *, integer *, real *, integer *, real *, integer *, integer + *, integer *, real *, integer *, integer *); + char job[1]; + real scl, dum[1], eps; + integer lwork_trevc__; + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --scale; + --rconde; + --rcondv; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + wntsnn = lsame_(sense, "N"); + wntsne = lsame_(sense, "E"); + wntsnv = lsame_(sense, "V"); + wntsnb = lsame_(sense, "B"); + if (! (lsame_(balanc, "N") || lsame_(balanc, "S") || lsame_(balanc, "P") + || lsame_(balanc, "B"))) { + *info = -1; + } else if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -2; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -3; + } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb) + && ! (wantvl && wantvr)) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -11; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -13; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ +/* HSWORK refers to the workspace preferred by SHSEQR, as */ +/* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ +/* the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = *n + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, n, & + c__0, (ftnlen)6, (ftnlen)1); + + if (wantvl) { + strevc3_("L", "B", select, n, &a[a_offset], lda, &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, & + work[1], &c_n1, &ierr); + lwork_trevc__ = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = f2cmax(i__1,i__2); + shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); + } else if (wantvr) { + strevc3_("R", "B", select, n, &a[a_offset], lda, &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, & + work[1], &c_n1, &ierr); + lwork_trevc__ = (integer) work[1]; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = f2cmax(i__1,i__2); + shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ + 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); + } else { + if (wntsnn) { + shseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], + &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, + info); + } else { + shseqr_("S", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], + &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, + info); + } + } + hswork = (integer) work[1]; + + if (! wantvl && ! wantvr) { + minwrk = *n << 1; + if (! wntsnn) { +/* Computing MAX */ + i__1 = minwrk, i__2 = *n * *n + *n * 6; + minwrk = f2cmax(i__1,i__2); + } + maxwrk = f2cmax(maxwrk,hswork); + if (! wntsnn) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *n + *n * 6; + maxwrk = f2cmax(i__1,i__2); + } + } else { + minwrk = *n * 3; + if (! wntsnn && ! wntsne) { +/* Computing MAX */ + i__1 = minwrk, i__2 = *n * *n + *n * 6; + minwrk = f2cmax(i__1,i__2); + } + maxwrk = f2cmax(maxwrk,hswork); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "SORGHR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + if (! wntsnn && ! wntsne) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *n + *n * 6; + maxwrk = f2cmax(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3; + maxwrk = f2cmax(i__1,i__2); + } + maxwrk = f2cmax(maxwrk,minwrk); + } + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -21; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEEVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + icond = 0; + anrm = slange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* Balance the matrix and compute ABNRM */ + + sgebal_(balanc, n, &a[a_offset], lda, ilo, ihi, &scale[1], &ierr); + *abnrm = slange_("1", n, n, &a[a_offset], lda, dum); + if (scalea) { + dum[0] = *abnrm; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, & + ierr); + *abnrm = dum[0]; + } + +/* Reduce to upper Hessenberg form */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + itau = 1; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + sgehrd_(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, & + ierr); + + if (wantvl) { + +/* Want left eigenvectors */ +/* Copy Householder vectors to VL */ + + *(unsigned char *)side = 'L'; + slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* Generate orthogonal matrix in VL */ +/* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], & + i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VL */ +/* (Workspace: need 1, prefer HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[ + vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* Want left and right eigenvectors */ +/* Copy Schur vectors to VR */ + + *(unsigned char *)side = 'B'; + slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* Want right eigenvectors */ +/* Copy Householder vectors to VR */ + + *(unsigned char *)side = 'R'; + slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* Generate orthogonal matrix in VR */ +/* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], & + i__1, &ierr); + +/* Perform QR iteration, accumulating Schur vectors in VR */ +/* (Workspace: need 1, prefer HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* Compute eigenvalues only */ +/* If condition numbers desired, compute Schur form */ + + if (wntsnn) { + *(unsigned char *)job = 'E'; + } else { + *(unsigned char *)job = 'S'; + } + +/* (Workspace: need 1, prefer HSWORK (see comments) ) */ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_(job, "N", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO .NE. 0 from SHSEQR, then quit */ + + if (*info != 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* Compute left and/or right eigenvectors */ +/* (Workspace: need 3*N, prefer N + 2*N*NB) */ + + i__1 = *lwork - iwrk + 1; + strevc3_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &i__1, & + ierr); + } + +/* Compute condition numbers if desired */ +/* (Workspace: need N*N+6*N unless SENSE = 'E') */ + + if (! wntsnn) { + strsna_(sense, "A", select, n, &a[a_offset], lda, &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, &rconde[1], &rcondv[1], n, &nout, + &work[iwrk], n, &iwork[1], &icond); + } + + if (wantvl) { + +/* Undo balancing of left eigenvectors */ + + sgebak_(balanc, "L", n, ilo, ihi, &scale[1], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.f) { + scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.f) { + r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1.f / slapy2_(&r__1, &r__2); + sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + r__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + r__2 = vl[k + (i__ + 1) * vl_dim1]; + work[k] = r__1 * r__1 + r__2 * r__2; +/* L10: */ + } + k = isamax_(n, &work[1], &c__1); + slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.f; + } +/* L20: */ + } + } + + if (wantvr) { + +/* Undo balancing of right eigenvectors */ + + sgebak_(balanc, "R", n, ilo, ihi, &scale[1], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.f) { + scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.f) { + r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1.f / slapy2_(&r__1, &r__2); + sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + r__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + r__2 = vr[k + (i__ + 1) * vr_dim1]; + work[k] = r__1 * r__1 + r__2 * r__2; +/* L30: */ + } + k = isamax_(n, &work[1], &c__1); + slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.f; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = f2cmax(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = f2cmax(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info == 0) { + if ((wntsnv || wntsnb) && icond == 0) { + slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &rcondv[ + 1], n, &ierr); + } + } else { + i__1 = *ilo - 1; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = *ilo - 1; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (real) maxwrk; + return 0; + +/* End of SGEEVX */ + +} /* sgeevx_ */ + diff --git a/lapack-netlib/SRC/sgehd2.c b/lapack-netlib/SRC/sgehd2.c new file mode 100644 index 000000000..f5e8e9e51 --- /dev/null +++ b/lapack-netlib/SRC/sgehd2.c @@ -0,0 +1,627 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. +*/ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEHD2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER IHI, ILO, INFO, LDA, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEHD2 reduces a real general matrix A to upper Hessenberg form H by */ +/* > an orthogonal similarity transformation: Q**T * A * Q = H . */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > It is assumed that A is already upper triangular in rows */ +/* > and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ +/* > set by a previous call to SGEBAL; otherwise they should be */ +/* > set to 1 and N respectively. See Further Details. */ +/* > 1 <= ILO <= IHI <= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the n by n general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > elements below the first subdiagonal, with the array TAU, */ +/* > represent the orthogonal matrix Q as a product of elementary */ +/* > reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of (ihi-ilo) elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */ +/* > exit in A(i+2:ihi,i), and tau in TAU(i). */ +/* > */ +/* > The contents of A are illustrated by the following example, with */ +/* > n = 7, ilo = 2 and ihi = 6: */ +/* > */ +/* > on entry, on exit, */ +/* > */ +/* > ( a a a a a a a ) ( a a h h h h a ) */ +/* > ( a a a a a a ) ( a h h h h a ) */ +/* > ( a a a a a a ) ( h h h h h h ) */ +/* > ( a a a a a a ) ( v2 h h h h h ) */ +/* > ( a a a a a a ) ( v2 v3 h h h h ) */ +/* > ( a a a a a a ) ( v2 v3 v4 h h h ) */ +/* > ( a ) ( a ) */ +/* > */ +/* > where a denotes an element of the original matrix A, h denotes a */ +/* > modified element of the upper Hessenberg matrix H, and vi denotes an */ +/* > element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *, ftnlen), slarfg_(integer *, real *, real *, + integer *, real *); + real aii; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) { + *info = -2; + } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEHD2", &i__1, (ftnlen)6); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[f2cmin(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + slarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + a[i__ + 1 + i__ * a_dim1] = aii; +/* L10: */ + } + + return 0; + +/* End of SGEHD2 */ + +} /* sgehd2_ */ + diff --git a/lapack-netlib/SRC/sgehrd.c b/lapack-netlib/SRC/sgehrd.c new file mode 100644 index 000000000..1237fe353 --- /dev/null +++ b/lapack-netlib/SRC/sgehrd.c @@ -0,0 +1,782 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEHRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEHRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER IHI, ILO, INFO, LDA, LWORK, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEHRD reduces a real general matrix A to upper Hessenberg form H by */ +/* > an orthogonal similarity transformation: Q**T * A * Q = H . */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > It is assumed that A is already upper triangular in rows */ +/* > and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ +/* > set by a previous call to SGEBAL; otherwise they should be */ +/* > set to 1 and N respectively. See Further Details. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > elements below the first subdiagonal, with the array TAU, */ +/* > represent the orthogonal matrix Q as a product of elementary */ +/* > reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */ +/* > zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (LWORK) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of (ihi-ilo) elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(ilo) H(ilo+1) . . . H(ihi-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */ +/* > exit in A(i+2:ihi,i), and tau in TAU(i). */ +/* > */ +/* > The contents of A are illustrated by the following example, with */ +/* > n = 7, ilo = 2 and ihi = 6: */ +/* > */ +/* > on entry, on exit, */ +/* > */ +/* > ( a a a a a a a ) ( a a h h h h a ) */ +/* > ( a a a a a a ) ( a h h h h a ) */ +/* > ( a a a a a a ) ( h h h h h h ) */ +/* > ( a a a a a a ) ( v2 h h h h h ) */ +/* > ( a a a a a a ) ( v2 v3 h h h h ) */ +/* > ( a a a a a a ) ( v2 v3 v4 h h h ) */ +/* > ( a ) ( a ) */ +/* > */ +/* > where a denotes an element of the original matrix A, h denotes a */ +/* > modified element of the upper Hessenberg matrix H, and vi denotes an */ +/* > element of the vector defining H(i). */ +/* > */ +/* > This file is a slight modification of LAPACK-3.0's DGEHRD */ +/* > subroutine incorporating improvements proposed by Quintana-Orti and */ +/* > Van de Geijn (2006). (See DLAHR2.) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, nbmin, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), strmm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, + integer *), saxpy_(integer *, + real *, real *, integer *, real *, integer *), sgehd2_(integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + ), slahr2_(integer *, integer *, integer *, real *, integer *, + real *, real *, integer *, real *, integer *); + integer ib; + real ei; + integer nb, nh, nx; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *,ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + integer iwt; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) { + *info = -2; + } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = f2cmin(i__1,i__2); + lwkopt = *n * nb + 4160; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEHRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.f; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = f2cmax(1,*ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.f; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.f; + return 0; + } + +/* Determine the block size */ + +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = f2cmin(i__1,i__2); + nbmin = 2; + if (nb > 1 && nb < nh) { + +/* Determine when to cross over from blocked to unblocked code */ +/* (last block is always handled by unblocked code) */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code */ + + if (*lwork < *n * nb + 4160) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + if (*lwork >= *n * nbmin + 4160) { + nb = (*lwork - 4160) / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + iwt = *n * nb + 1; + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = f2cmin(i__3,i__4); + +/* Reduce columns i:i+ib-1 to Hessenberg form, returning the */ +/* matrices V and T of the block reflector H = I - V*T*V**T */ +/* which performs the reduction, and also the matrix Y = A*V*T */ + + slahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], & + work[iwt], &c__65, &work[1], &ldwork); + +/* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */ +/* right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set */ +/* to 1 */ + + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f; + i__3 = *ihi - i__ - ib + 1; + sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, & + work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & + c_b26, &a[(i__ + ib) * a_dim1 + 1], lda); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + +/* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */ +/* right */ + + i__3 = ib - 1; + strmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26, + &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + saxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + + j + 1) * a_dim1 + 1], &c__1); +/* L30: */ + } + +/* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */ +/* left */ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, &work[iwt], & + c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], & + ldwork); +/* L40: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (real) lwkopt; + + return 0; + +/* End of SGEHRD */ + +} /* sgehrd_ */ + diff --git a/lapack-netlib/SRC/sgejsv.c b/lapack-netlib/SRC/sgejsv.c new file mode 100644 index 000000000..416fdb4d5 --- /dev/null +++ b/lapack-netlib/SRC/sgejsv.c @@ -0,0 +1,2678 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) ceil(w) +#define myhuge_(w) HUGE_VAL +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEJSV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEJSV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, */ +/* M, N, A, LDA, SVA, U, LDU, V, LDV, */ +/* WORK, LWORK, IWORK, INFO ) */ + +/* IMPLICIT NONE */ +/* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N */ +/* REAL A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ), */ +/* $ WORK( LWORK ) */ +/* INTEGER IWORK( * ) */ +/* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEJSV computes the singular value decomposition (SVD) of a real M-by-N */ +/* > matrix [A], where M >= N. The SVD of [A] is written as */ +/* > */ +/* > [A] = [U] * [SIGMA] * [V]^t, */ +/* > */ +/* > where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N */ +/* > diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and */ +/* > [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are */ +/* > the singular values of [A]. The columns of [U] and [V] are the left and */ +/* > the right singular vectors of [A], respectively. The matrices [U] and [V] */ +/* > are computed and stored in the arrays U and V, respectively. The diagonal */ +/* > of [SIGMA] is computed and stored in the array SVA. */ +/* > SGEJSV can sometimes compute tiny singular values and their singular vectors much */ +/* > more accurately than other SVD routines, see below under Further Details. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBA */ +/* > \verbatim */ +/* > JOBA is CHARACTER*1 */ +/* > Specifies the level of accuracy: */ +/* > = 'C': This option works well (high relative accuracy) if A = B * D, */ +/* > with well-conditioned B and arbitrary diagonal matrix D. */ +/* > The accuracy cannot be spoiled by COLUMN scaling. The */ +/* > accuracy of the computed output depends on the condition of */ +/* > B, and the procedure aims at the best theoretical accuracy. */ +/* > The relative error max_{i=1:N}|d sigma_i| / sigma_i is */ +/* > bounded by f(M,N)*epsilon* cond(B), independent of D. */ +/* > The input matrix is preprocessed with the QRF with column */ +/* > pivoting. This initial preprocessing and preconditioning by */ +/* > a rank revealing QR factorization is common for all values of */ +/* > JOBA. Additional actions are specified as follows: */ +/* > = 'E': Computation as with 'C' with an additional estimate of the */ +/* > condition number of B. It provides a realistic error bound. */ +/* > = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings */ +/* > D1, D2, and well-conditioned matrix C, this option gives */ +/* > higher accuracy than the 'C' option. If the structure of the */ +/* > input matrix is not known, and relative accuracy is */ +/* > desirable, then this option is advisable. The input matrix A */ +/* > is preprocessed with QR factorization with FULL (row and */ +/* > column) pivoting. */ +/* > = 'G': Computation as with 'F' with an additional estimate of the */ +/* > condition number of B, where A=D*B. If A has heavily weighted */ +/* > rows, then using this condition number gives too pessimistic */ +/* > error bound. */ +/* > = 'A': Small singular values are the noise and the matrix is treated */ +/* > as numerically rank deficient. The error in the computed */ +/* > singular values is bounded by f(m,n)*epsilon*||A||. */ +/* > The computed SVD A = U * S * V^t restores A up to */ +/* > f(m,n)*epsilon*||A||. */ +/* > This gives the procedure the licence to discard (set to zero) */ +/* > all singular values below N*epsilon*||A||. */ +/* > = 'R': Similar as in 'A'. Rank revealing property of the initial */ +/* > QR factorization is used do reveal (using triangular factor) */ +/* > a gap sigma_{r+1} < epsilon * sigma_r in which case the */ +/* > numerical RANK is declared to be r. The SVD is computed with */ +/* > absolute error bounds, but more accurately than with 'A'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies whether to compute the columns of U: */ +/* > = 'U': N columns of U are returned in the array U. */ +/* > = 'F': full set of M left sing. vectors is returned in the array U. */ +/* > = 'W': U may be used as workspace of length M*N. See the description */ +/* > of U. */ +/* > = 'N': U is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether to compute the matrix V: */ +/* > = 'V': N columns of V are returned in the array V; Jacobi rotations */ +/* > are not explicitly accumulated. */ +/* > = 'J': N columns of V are returned in the array V, but they are */ +/* > computed as the product of Jacobi rotations. This option is */ +/* > allowed only if JOBU .NE. 'N', i.e. in computing the full SVD. */ +/* > = 'W': V may be used as workspace of length N*N. See the description */ +/* > of V. */ +/* > = 'N': V is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBR */ +/* > \verbatim */ +/* > JOBR is CHARACTER*1 */ +/* > Specifies the RANGE for the singular values. Issues the licence to */ +/* > set to zero small positive singular values if they are outside */ +/* > specified range. If A .NE. 0 is scaled so that the largest singular */ +/* > value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues */ +/* > the licence to kill columns of A whose norm in c*A is less than */ +/* > SQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN, */ +/* > where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). */ +/* > = 'N': Do not kill small columns of c*A. This option assumes that */ +/* > BLAS and QR factorizations and triangular solvers are */ +/* > implemented to work in that range. If the condition of A */ +/* > is greater than BIG, use SGESVJ. */ +/* > = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)] */ +/* > (roughly, as described above). This option is recommended. */ +/* > =========================== */ +/* > For computing the singular values in the FULL range [SFMIN,BIG] */ +/* > use SGESVJ. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBT */ +/* > \verbatim */ +/* > JOBT is CHARACTER*1 */ +/* > If the matrix is square then the procedure may determine to use */ +/* > transposed A if A^t seems to be better with respect to convergence. */ +/* > If the matrix is not square, JOBT is ignored. This is subject to */ +/* > changes in the future. */ +/* > The decision is based on two values of entropy over the adjoint */ +/* > orbit of A^t * A. See the descriptions of WORK(6) and WORK(7). */ +/* > = 'T': transpose if entropy test indicates possibly faster */ +/* > convergence of Jacobi process if A^t is taken as input. If A is */ +/* > replaced with A^t, then the row pivoting is included automatically. */ +/* > = 'N': do not speculate. */ +/* > This option can be used to compute only the singular values, or the */ +/* > full SVD (U, SIGMA and V). For only one set of singular vectors */ +/* > (U or V), the caller should provide both U and V, as one of the */ +/* > matrices is used as workspace if the matrix A is transposed. */ +/* > The implementer can easily remove this constraint and make the */ +/* > code more complicated. See the descriptions of U and V. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBP */ +/* > \verbatim */ +/* > JOBP is CHARACTER*1 */ +/* > Issues the licence to introduce structured perturbations to drown */ +/* > denormalized numbers. This licence should be active if the */ +/* > denormals are poorly implemented, causing slow computation, */ +/* > especially in cases of fast convergence (!). For details see [1,2]. */ +/* > For the sake of simplicity, this perturbations are included only */ +/* > when the full SVD or only the singular values are requested. The */ +/* > implementer/user can easily add the perturbation for the cases of */ +/* > computing one set of singular vectors. */ +/* > = 'P': introduce perturbation */ +/* > = 'N': do not perturb */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SVA */ +/* > \verbatim */ +/* > SVA is REAL array, dimension (N) */ +/* > On exit, */ +/* > - For WORK(1)/WORK(2) = ONE: The singular values of A. During the */ +/* > computation SVA contains Euclidean column norms of the */ +/* > iterated matrices in the array A. */ +/* > - For WORK(1) .NE. WORK(2): The singular values of A are */ +/* > (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if */ +/* > sigma_max(A) overflows or if small singular values have been */ +/* > saved from underflow by scaling the input matrix A. */ +/* > - If JOBR='R' then some of the singular values may be returned */ +/* > as exact zeros obtained by "set to zero" because they are */ +/* > below the numerical rank threshold or are denormalized numbers. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is REAL array, dimension ( LDU, N ) */ +/* > If JOBU = 'U', then U contains on exit the M-by-N matrix of */ +/* > the left singular vectors. */ +/* > If JOBU = 'F', then U contains on exit the M-by-M matrix of */ +/* > the left singular vectors, including an ONB */ +/* > of the orthogonal complement of the Range(A). */ +/* > If JOBU = 'W' .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N), */ +/* > then U is used as workspace if the procedure */ +/* > replaces A with A^t. In that case, [V] is computed */ +/* > in U as left singular vectors of A^t and then */ +/* > copied back to the V array. This 'W' option is just */ +/* > a reminder to the caller that in this case U is */ +/* > reserved as workspace of length N*N. */ +/* > If JOBU = 'N' U is not referenced, unless JOBT='T'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U, LDU >= 1. */ +/* > IF JOBU = 'U' or 'F' or 'W', then LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is REAL array, dimension ( LDV, N ) */ +/* > If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of */ +/* > the right singular vectors; */ +/* > If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N), */ +/* > then V is used as workspace if the pprocedure */ +/* > replaces A with A^t. In that case, [U] is computed */ +/* > in V as right singular vectors of A^t and then */ +/* > copied back to the U array. This 'W' option is just */ +/* > a reminder to the caller that in this case V is */ +/* > reserved as workspace of length N*N. */ +/* > If JOBV = 'N' V is not referenced, unless JOBT='T'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V' or 'J' or 'W', then LDV >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (LWORK) */ +/* > On exit, */ +/* > WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such */ +/* > that SCALE*SVA(1:N) are the computed singular values */ +/* > of A. (See the description of SVA().) */ +/* > WORK(2) = See the description of WORK(1). */ +/* > WORK(3) = SCONDA is an estimate for the condition number of */ +/* > column equilibrated A. (If JOBA = 'E' or 'G') */ +/* > SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1). */ +/* > It is computed using SPOCON. It holds */ +/* > N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ +/* > where R is the triangular factor from the QRF of A. */ +/* > However, if R is truncated and the numerical rank is */ +/* > determined to be strictly smaller than N, SCONDA is */ +/* > returned as -1, thus indicating that the smallest */ +/* > singular values might be lost. */ +/* > */ +/* > If full SVD is needed, the following two condition numbers are */ +/* > useful for the analysis of the algorithm. They are provied for */ +/* > a developer/implementer who is familiar with the details of */ +/* > the method. */ +/* > */ +/* > WORK(4) = an estimate of the scaled condition number of the */ +/* > triangular factor in the first QR factorization. */ +/* > WORK(5) = an estimate of the scaled condition number of the */ +/* > triangular factor in the second QR factorization. */ +/* > The following two parameters are computed if JOBT = 'T'. */ +/* > They are provided for a developer/implementer who is familiar */ +/* > with the details of the method. */ +/* > */ +/* > WORK(6) = the entropy of A^t*A :: this is the Shannon entropy */ +/* > of diag(A^t*A) / Trace(A^t*A) taken as point in the */ +/* > probability simplex. */ +/* > WORK(7) = the entropy of A*A^t. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > Length of WORK to confirm proper allocation of work space. */ +/* > LWORK depends on the job: */ +/* > */ +/* > If only SIGMA is needed ( JOBU = 'N', JOBV = 'N' ) and */ +/* > -> .. no scaled condition estimate required (JOBE = 'N'): */ +/* > LWORK >= f2cmax(2*M+N,4*N+1,7). This is the minimal requirement. */ +/* > ->> For optimal performance (blocked code) the optimal value */ +/* > is LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal */ +/* > block size for DGEQP3 and DGEQRF. */ +/* > In general, optimal LWORK is computed as */ +/* > LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7). */ +/* > -> .. an estimate of the scaled condition number of A is */ +/* > required (JOBA='E', 'G'). In this case, LWORK is the maximum */ +/* > of the above and N*N+4*N, i.e. LWORK >= f2cmax(2*M+N,N*N+4*N,7). */ +/* > ->> For optimal performance (blocked code) the optimal value */ +/* > is LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7). */ +/* > In general, the optimal length LWORK is computed as */ +/* > LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), */ +/* > N+N*N+LWORK(DPOCON),7). */ +/* > */ +/* > If SIGMA and the right singular vectors are needed (JOBV = 'V'), */ +/* > -> the minimal requirement is LWORK >= f2cmax(2*M+N,4*N+1,7). */ +/* > -> For optimal performance, LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7), */ +/* > where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ, */ +/* > DORMLQ. In general, the optimal length LWORK is computed as */ +/* > LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON), */ +/* > N+LWORK(DGELQ), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)). */ +/* > */ +/* > If SIGMA and the left singular vectors are needed */ +/* > -> the minimal requirement is LWORK >= f2cmax(2*M+N,4*N+1,7). */ +/* > -> For optimal performance: */ +/* > if JOBU = 'U' :: LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,7), */ +/* > if JOBU = 'F' :: LWORK >= f2cmax(2*M+N,3*N+(N+1)*NB,N+M*NB,7), */ +/* > where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR. */ +/* > In general, the optimal length LWORK is computed as */ +/* > LWORK >= f2cmax(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON), */ +/* > 2*N+LWORK(DGEQRF), N+LWORK(DORMQR)). */ +/* > Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or */ +/* > M*NB (for JOBU = 'F'). */ +/* > */ +/* > If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and */ +/* > -> if JOBV = 'V' */ +/* > the minimal requirement is LWORK >= f2cmax(2*M+N,6*N+2*N*N). */ +/* > -> if JOBV = 'J' the minimal requirement is */ +/* > LWORK >= f2cmax(2*M+N, 4*N+N*N,2*N+N*N+6). */ +/* > -> For optimal performance, LWORK should be additionally */ +/* > larger than N+M*NB, where NB is the optimal block size */ +/* > for DORMQR. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (M+3*N). */ +/* > On exit, */ +/* > IWORK(1) = the numerical rank determined after the initial */ +/* > QR factorization with pivoting. See the descriptions */ +/* > of JOBA and JOBR. */ +/* > IWORK(2) = the number of the computed nonzero singular values */ +/* > IWORK(3) = if nonzero, a warning message: */ +/* > If IWORK(3) = 1 then some of the column norms of A */ +/* > were denormalized floats. The requested high accuracy */ +/* > is not warranted by the data. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value. */ +/* > = 0: successful exit; */ +/* > > 0: SGEJSV did not converge in the maximal allowed number */ +/* > of sweeps. The computed values may be inaccurate. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup realGEsing */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEJSV implements a preconditioned Jacobi SVD algorithm. It uses SGEQP3, */ +/* > SGEQRF, and SGELQF as preprocessors and preconditioners. Optionally, an */ +/* > additional row pivoting can be used as a preprocessor, which in some */ +/* > cases results in much higher accuracy. An example is matrix A with the */ +/* > structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned */ +/* > diagonal matrices and C is well-conditioned matrix. In that case, complete */ +/* > pivoting in the first QR factorizations provides accuracy dependent on the */ +/* > condition number of C, and independent of D1, D2. Such higher accuracy is */ +/* > not completely understood theoretically, but it works well in practice. */ +/* > Further, if A can be written as A = B*D, with well-conditioned B and some */ +/* > diagonal D, then the high accuracy is guaranteed, both theoretically and */ +/* > in software, independent of D. For more details see [1], [2]. */ +/* > The computational range for the singular values can be the full range */ +/* > ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS */ +/* > & LAPACK routines called by SGEJSV are implemented to work in that range. */ +/* > If that is not the case, then the restriction for safe computation with */ +/* > the singular values in the range of normalized IEEE numbers is that the */ +/* > spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not */ +/* > overflow. This code (SGEJSV) is best used in this restricted range, */ +/* > meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are */ +/* > returned as zeros. See JOBR for details on this. */ +/* > Further, this implementation is somewhat slower than the one described */ +/* > in [1,2] due to replacement of some non-LAPACK components, and because */ +/* > the choice of some tuning parameters in the iterative part (SGESVJ) is */ +/* > left to the implementer on a particular machine. */ +/* > The rank revealing QR factorization (in this code: SGEQP3) should be */ +/* > implemented as in [3]. We have a new version of SGEQP3 under development */ +/* > that is more robust than the current one in LAPACK, with a cleaner cut in */ +/* > rank deficient cases. It will be available in the SIGMA library [4]. */ +/* > If M is much larger than N, it is obvious that the initial QRF with */ +/* > column pivoting can be preprocessed by the QRF without pivoting. That */ +/* > well known trick is not used in SGEJSV because in some cases heavy row */ +/* > weighting can be treated with complete pivoting. The overhead in cases */ +/* > M much larger than N is then only due to pivoting, but the benefits in */ +/* > terms of accuracy have prevailed. The implementer/user can incorporate */ +/* > this extra QRF step easily. The implementer can also improve data movement */ +/* > (matrix transpose, matrix copy, matrix transposed copy) - this */ +/* > implementation of SGEJSV uses only the simplest, naive data movement. */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ + +/* > \par References: */ +/* ================ */ +/* > */ +/* > \verbatim */ +/* > */ +/* > [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */ +/* > LAPACK Working note 169. */ +/* > [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */ +/* > LAPACK Working note 170. */ +/* > [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR */ +/* > factorization software - a case study. */ +/* > ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. */ +/* > LAPACK Working note 176. */ +/* > [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */ +/* > QSVD, (H,K)-SVD computations. */ +/* > Department of Mathematics, University of Zagreb, 2008. */ +/* > \endverbatim */ + +/* > \par Bugs, examples and comments: */ +/* ================================= */ +/* > */ +/* > Please report all bugs and send interesting examples and/or comments to */ +/* > drmac@math.hr. Thank you. */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgejsv_(char *joba, char *jobu, char *jobv, char *jobr, + char *jobt, char *jobp, integer *m, integer *n, real *a, integer *lda, + real *sva, real *u, integer *ldu, real *v, integer *ldv, real *work, + integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, + i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11, i__12; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + logical defr; + real aapp, aaqq; + logical kill; + integer ierr; + real temp1; + extern real snrm2_(integer *, real *, integer *); + integer p, q; + logical jracc; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + real small, entra, sfmin; + logical lsvec; + real epsln; + logical rsvec; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + integer n1; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *); + logical l2aber; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ); + real condr1, condr2, uscal1, uscal2; + logical l2kill, l2rank, l2tran; + extern /* Subroutine */ int sgeqp3_(integer *, integer *, real *, integer + *, integer *, real *, real *, integer *, integer *); + logical l2pert; + integer nr; + real scalem, sconda; + logical goscal; + real aatmin; + extern real slamch_(char *); + real aatmax; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical noscal; + extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), sgeqrf_(integer *, integer *, real *, integer *, real *, + real *, integer *, integer *), slacpy_(char *, integer *, integer + *, real *, integer *, real *, integer *), slaset_(char *, + integer *, integer *, real *, real *, real *, integer *); + real entrat; + logical almort; + real maxprj; + extern /* Subroutine */ int spocon_(char *, integer *, real *, integer *, + real *, real *, real *, integer *, integer *); + logical errest; + extern /* Subroutine */ int sgesvj_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , real *, integer *, integer *), slassq_( + integer *, real *, integer *, real *, real *); + logical transp; + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *), sorgqr_(integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, integer + *), sormlq_(char *, char *, integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, real *, integer *, + integer *), sormqr_(char *, char *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , real *, integer *, integer *); + logical rowpiv; + real big, cond_ok__, xsc, big1; + integer warning, numrank; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* =========================================================================== */ + + + +/* Test the input arguments */ + + /* Parameter adjustments */ + --sva; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + --iwork; + + /* Function Body */ + lsvec = lsame_(jobu, "U") || lsame_(jobu, "F"); + jracc = lsame_(jobv, "J"); + rsvec = lsame_(jobv, "V") || jracc; + rowpiv = lsame_(joba, "F") || lsame_(joba, "G"); + l2rank = lsame_(joba, "R"); + l2aber = lsame_(joba, "A"); + errest = lsame_(joba, "E") || lsame_(joba, "G"); + l2tran = lsame_(jobt, "T"); + l2kill = lsame_(jobr, "R"); + defr = lsame_(jobr, "N"); + l2pert = lsame_(jobp, "P"); + + if (! (rowpiv || l2rank || l2aber || errest || lsame_(joba, "C"))) { + *info = -1; + } else if (! (lsvec || lsame_(jobu, "N") || lsame_( + jobu, "W"))) { + *info = -2; + } else if (! (rsvec || lsame_(jobv, "N") || lsame_( + jobv, "W")) || jracc && ! lsvec) { + *info = -3; + } else if (! (l2kill || defr)) { + *info = -4; + } else if (! (l2tran || lsame_(jobt, "N"))) { + *info = -5; + } else if (! (l2pert || lsame_(jobp, "N"))) { + *info = -6; + } else if (*m < 0) { + *info = -7; + } else if (*n < 0 || *n > *m) { + *info = -8; + } else if (*lda < *m) { + *info = -10; + } else if (lsvec && *ldu < *m) { + *info = -13; + } else if (rsvec && *ldv < *n) { + *info = -15; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 7, i__2 = (*n << 2) + 1, i__1 = f2cmax(i__1,i__2), i__2 = (*m << + 1) + *n; +/* Computing MAX */ + i__3 = 7, i__4 = (*n << 2) + *n * *n, i__3 = f2cmax(i__3,i__4), i__4 = (* + m << 1) + *n; +/* Computing MAX */ + i__5 = 7, i__6 = (*m << 1) + *n, i__5 = f2cmax(i__5,i__6), i__6 = (*n << + 2) + 1; +/* Computing MAX */ + i__7 = 7, i__8 = (*m << 1) + *n, i__7 = f2cmax(i__7,i__8), i__8 = (*n << + 2) + 1; +/* Computing MAX */ + i__9 = (*m << 1) + *n, i__10 = *n * 6 + (*n << 1) * *n; +/* Computing MAX */ + i__11 = (*m << 1) + *n, i__12 = (*n << 2) + *n * *n, i__11 = f2cmax( + i__11,i__12), i__12 = (*n << 1) + *n * *n + 6; + if (! (lsvec || rsvec || errest) && *lwork < f2cmax(i__1,i__2) || ! ( + lsvec || rsvec) && errest && *lwork < f2cmax(i__3,i__4) || lsvec + && ! rsvec && *lwork < f2cmax(i__5,i__6) || rsvec && ! lsvec && * + lwork < f2cmax(i__7,i__8) || lsvec && rsvec && ! jracc && *lwork + < f2cmax(i__9,i__10) || lsvec && rsvec && jracc && *lwork < f2cmax( + i__11,i__12)) { + *info = -17; + } else { +/* #:) */ + *info = 0; + } + } + + if (*info != 0) { +/* #:( */ + i__1 = -(*info); + xerbla_("SGEJSV", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return for void matrix (Y3K safe) */ +/* #:) */ + if (*m == 0 || *n == 0) { + iwork[1] = 0; + iwork[2] = 0; + iwork[3] = 0; + work[1] = 0.f; + work[2] = 0.f; + work[3] = 0.f; + work[4] = 0.f; + work[5] = 0.f; + work[6] = 0.f; + work[7] = 0.f; + return 0; + } + +/* Determine whether the matrix U should be M x N or M x M */ + + if (lsvec) { + n1 = *n; + if (lsame_(jobu, "F")) { + n1 = *m; + } + } + +/* Set numerical parameters */ + +/* ! NOTE: Make sure SLAMCH() does not fail on the target architecture. */ + + epsln = slamch_("Epsilon"); + sfmin = slamch_("SafeMinimum"); + small = sfmin / epsln; + big = slamch_("O"); +/* BIG = ONE / SFMIN */ + +/* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N */ + +/* (!) If necessary, scale SVA() to protect the largest norm from */ +/* overflow. It is possible that this scaling pushes the smallest */ +/* column norm left from the underflow threshold (extreme case). */ + + scalem = 1.f / sqrt((real) (*m) * (real) (*n)); + noscal = TRUE_; + goscal = TRUE_; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.f; + aaqq = 1.f; + slassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -9; + i__2 = -(*info); + xerbla_("SGEJSV", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscal) { + sva[p] = aapp * aaqq; + } else { + noscal = FALSE_; + sva[p] = aapp * (aaqq * scalem); + if (goscal) { + goscal = FALSE_; + i__2 = p - 1; + sscal_(&i__2, &scalem, &sva[1], &c__1); + } + } +/* L1874: */ + } + + if (noscal) { + scalem = 1.f; + } + + aapp = 0.f; + aaqq = big; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { +/* Computing MAX */ + r__1 = aapp, r__2 = sva[p]; + aapp = f2cmax(r__1,r__2); + if (sva[p] != 0.f) { +/* Computing MIN */ + r__1 = aaqq, r__2 = sva[p]; + aaqq = f2cmin(r__1,r__2); + } +/* L4781: */ + } + +/* Quick return for zero M x N matrix */ +/* #:) */ + if (aapp == 0.f) { + if (lsvec) { + slaset_("G", m, &n1, &c_b34, &c_b35, &u[u_offset], ldu) + ; + } + if (rsvec) { + slaset_("G", n, n, &c_b34, &c_b35, &v[v_offset], ldv); + } + work[1] = 1.f; + work[2] = 1.f; + if (errest) { + work[3] = 1.f; + } + if (lsvec && rsvec) { + work[4] = 1.f; + work[5] = 1.f; + } + if (l2tran) { + work[6] = 0.f; + work[7] = 0.f; + } + iwork[1] = 0; + iwork[2] = 0; + iwork[3] = 0; + return 0; + } + +/* Issue warning if denormalized column norms detected. Override the */ +/* high relative accuracy request. Issue licence to kill columns */ +/* (set them to zero) whose norm is less than sigma_max / BIG (roughly). */ +/* #:( */ + warning = 0; + if (aaqq <= sfmin) { + l2rank = TRUE_; + l2kill = TRUE_; + warning = 1; + } + +/* Quick return for one-column matrix */ +/* #:) */ + if (*n == 1) { + + if (lsvec) { + slascl_("G", &c__0, &c__0, &sva[1], &scalem, m, &c__1, &a[a_dim1 + + 1], lda, &ierr); + slacpy_("A", m, &c__1, &a[a_offset], lda, &u[u_offset], ldu); +/* computing all M left singular vectors of the M x 1 matrix */ + if (n1 != *n) { + i__1 = *lwork - *n; + sgeqrf_(m, n, &u[u_offset], ldu, &work[1], &work[*n + 1], & + i__1, &ierr); + i__1 = *lwork - *n; + sorgqr_(m, &n1, &c__1, &u[u_offset], ldu, &work[1], &work[*n + + 1], &i__1, &ierr); + scopy_(m, &a[a_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } + } + if (rsvec) { + v[v_dim1 + 1] = 1.f; + } + if (sva[1] < big * scalem) { + sva[1] /= scalem; + scalem = 1.f; + } + work[1] = 1.f / scalem; + work[2] = 1.f; + if (sva[1] != 0.f) { + iwork[1] = 1; + if (sva[1] / scalem >= sfmin) { + iwork[2] = 1; + } else { + iwork[2] = 0; + } + } else { + iwork[1] = 0; + iwork[2] = 0; + } + iwork[3] = 0; + if (errest) { + work[3] = 1.f; + } + if (lsvec && rsvec) { + work[4] = 1.f; + work[5] = 1.f; + } + if (l2tran) { + work[6] = 0.f; + work[7] = 0.f; + } + return 0; + + } + + transp = FALSE_; + l2tran = l2tran && *m == *n; + + aatmax = -1.f; + aatmin = big; + if (rowpiv || l2tran) { + +/* Compute the row norms, needed to determine row pivoting sequence */ +/* (in the case of heavily row weighted A, row pivoting is strongly */ +/* advised) and to collect information needed to compare the */ +/* structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.). */ + + if (l2tran) { + i__1 = *m; + for (p = 1; p <= i__1; ++p) { + xsc = 0.f; + temp1 = 1.f; + slassq_(n, &a[p + a_dim1], lda, &xsc, &temp1); +/* SLASSQ gets both the ell_2 and the ell_infinity norm */ +/* in one pass through the vector */ + work[*m + *n + p] = xsc * scalem; + work[*n + p] = xsc * (scalem * sqrt(temp1)); +/* Computing MAX */ + r__1 = aatmax, r__2 = work[*n + p]; + aatmax = f2cmax(r__1,r__2); + if (work[*n + p] != 0.f) { +/* Computing MIN */ + r__1 = aatmin, r__2 = work[*n + p]; + aatmin = f2cmin(r__1,r__2); + } +/* L1950: */ + } + } else { + i__1 = *m; + for (p = 1; p <= i__1; ++p) { + work[*m + *n + p] = scalem * (r__1 = a[p + isamax_(n, &a[p + + a_dim1], lda) * a_dim1], abs(r__1)); +/* Computing MAX */ + r__1 = aatmax, r__2 = work[*m + *n + p]; + aatmax = f2cmax(r__1,r__2); +/* Computing MIN */ + r__1 = aatmin, r__2 = work[*m + *n + p]; + aatmin = f2cmin(r__1,r__2); +/* L1904: */ + } + } + + } + +/* For square matrix A try to determine whether A^t would be better */ +/* input for the preconditioned Jacobi SVD, with faster convergence. */ +/* The decision is based on an O(N) function of the vector of column */ +/* and row norms of A, based on the Shannon entropy. This should give */ +/* the right choice in most cases when the difference actually matters. */ +/* It may fail and pick the slower converging side. */ + + entra = 0.f; + entrat = 0.f; + if (l2tran) { + + xsc = 0.f; + temp1 = 1.f; + slassq_(n, &sva[1], &c__1, &xsc, &temp1); + temp1 = 1.f / temp1; + + entra = 0.f; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { +/* Computing 2nd power */ + r__1 = sva[p] / xsc; + big1 = r__1 * r__1 * temp1; + if (big1 != 0.f) { + entra += big1 * log(big1); + } +/* L1113: */ + } + entra = -entra / log((real) (*n)); + +/* Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex. */ +/* It is derived from the diagonal of A^t * A. Do the same with the */ +/* diagonal of A * A^t, compute the entropy of the corresponding */ +/* probability distribution. Note that A * A^t and A^t * A have the */ +/* same trace. */ + + entrat = 0.f; + i__1 = *n + *m; + for (p = *n + 1; p <= i__1; ++p) { +/* Computing 2nd power */ + r__1 = work[p] / xsc; + big1 = r__1 * r__1 * temp1; + if (big1 != 0.f) { + entrat += big1 * log(big1); + } +/* L1114: */ + } + entrat = -entrat / log((real) (*m)); + +/* Analyze the entropies and decide A or A^t. Smaller entropy */ +/* usually means better input for the algorithm. */ + + transp = entrat < entra; + +/* If A^t is better than A, transpose A. */ + + if (transp) { +/* In an optimal implementation, this trivial transpose */ +/* should be replaced with faster transpose. */ + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + temp1 = a[q + p * a_dim1]; + a[q + p * a_dim1] = a[p + q * a_dim1]; + a[p + q * a_dim1] = temp1; +/* L1116: */ + } +/* L1115: */ + } + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + work[*m + *n + p] = sva[p]; + sva[p] = work[*n + p]; +/* L1117: */ + } + temp1 = aapp; + aapp = aatmax; + aatmax = temp1; + temp1 = aaqq; + aaqq = aatmin; + aatmin = temp1; + kill = lsvec; + lsvec = rsvec; + rsvec = kill; + if (lsvec) { + n1 = *n; + } + + rowpiv = TRUE_; + } + + } +/* END IF L2TRAN */ + +/* Scale the matrix so that its maximal singular value remains less */ +/* than SQRT(BIG) -- the matrix is scaled so that its maximal column */ +/* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep */ +/* SQRT(BIG) instead of BIG is the fact that SGEJSV uses LAPACK and */ +/* BLAS routines that, in some implementations, are not capable of */ +/* working in the full interval [SFMIN,BIG] and that they may provoke */ +/* overflows in the intermediate results. If the singular values spread */ +/* from SFMIN to BIG, then SGESVJ will compute them. So, in that case, */ +/* one should use SGESVJ instead of SGEJSV. */ + + big1 = sqrt(big); + temp1 = sqrt(big / (real) (*n)); + + slascl_("G", &c__0, &c__0, &aapp, &temp1, n, &c__1, &sva[1], n, &ierr); + if (aaqq > aapp * sfmin) { + aaqq = aaqq / aapp * temp1; + } else { + aaqq = aaqq * temp1 / aapp; + } + temp1 *= scalem; + slascl_("G", &c__0, &c__0, &aapp, &temp1, m, n, &a[a_offset], lda, &ierr); + +/* To undo scaling at the end of this procedure, multiply the */ +/* computed singular values with USCAL2 / USCAL1. */ + + uscal1 = temp1; + uscal2 = aapp; + + if (l2kill) { +/* L2KILL enforces computation of nonzero singular values in */ +/* the restricted range of condition number of the initial A, */ +/* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN). */ + xsc = sqrt(sfmin); + } else { + xsc = small; + +/* Now, if the condition number of A is too big, */ +/* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN, */ +/* as a precaution measure, the full SVD is computed using SGESVJ */ +/* with accumulated Jacobi rotations. This provides numerically */ +/* more robust computation, at the cost of slightly increased run */ +/* time. Depending on the concrete implementation of BLAS and LAPACK */ +/* (i.e. how they behave in presence of extreme ill-conditioning) the */ +/* implementor may decide to remove this switch. */ + if (aaqq < sqrt(sfmin) && lsvec && rsvec) { + jracc = TRUE_; + } + + } + if (aaqq < xsc) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + if (sva[p] < xsc) { + slaset_("A", m, &c__1, &c_b34, &c_b34, &a[p * a_dim1 + 1], + lda); + sva[p] = 0.f; + } +/* L700: */ + } + } + +/* Preconditioning using QR factorization with pivoting */ + + if (rowpiv) { +/* Optional row permutation (Bjoerck row pivoting): */ +/* A result by Cox and Higham shows that the Bjoerck's */ +/* row pivoting combined with standard column pivoting */ +/* has similar effect as Powell-Reid complete pivoting. */ +/* The ell-infinity norms of A are made nonincreasing. */ + i__1 = *m - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *m - p + 1; + q = isamax_(&i__2, &work[*m + *n + p], &c__1) + p - 1; + iwork[(*n << 1) + p] = q; + if (p != q) { + temp1 = work[*m + *n + p]; + work[*m + *n + p] = work[*m + *n + q]; + work[*m + *n + q] = temp1; + } +/* L1952: */ + } + i__1 = *m - 1; + slaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[(*n << 1) + 1], & + c__1); + } + +/* End of the preparation phase (scaling, optional sorting and */ +/* transposing, optional flushing of small columns). */ + +/* Preconditioning */ + +/* If the full SVD is needed, the right singular vectors are computed */ +/* from a matrix equation, and for that we need theoretical analysis */ +/* of the Businger-Golub pivoting. So we use SGEQP3 as the first RR QRF. */ +/* In all other cases the first RR QRF can be chosen by other criteria */ +/* (eg speed by replacing global with restricted window pivoting, such */ +/* as in SGEQPX from TOMS # 782). Good results will be obtained using */ +/* SGEQPX with properly (!) chosen numerical parameters. */ +/* Any improvement of SGEQP3 improves overal performance of SGEJSV. */ + +/* A * P1 = Q1 * [ R1^t 0]^t: */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + iwork[p] = 0; +/* L1963: */ + } + i__1 = *lwork - *n; + sgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], & + i__1, &ierr); + +/* The upper triangular matrix R1 from the first QRF is inspected for */ +/* rank deficiency and possibilities for deflation, or possible */ +/* ill-conditioning. Depending on the user specified flag L2RANK, */ +/* the procedure explores possibilities to reduce the numerical */ +/* rank by inspecting the computed upper triangular factor. If */ +/* L2RANK or L2ABER are up, then SGEJSV will compute the SVD of */ +/* A + dA, where ||dA|| <= f(M,N)*EPSLN. */ + + nr = 1; + if (l2aber) { +/* Standard absolute error bound suffices. All sigma_i with */ +/* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an */ +/* aggressive enforcement of lower numerical rank by introducing a */ +/* backward error of the order of N*EPSLN*||A||. */ + temp1 = sqrt((real) (*n)) * epsln; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((r__2 = a[p + p * a_dim1], abs(r__2)) >= temp1 * (r__1 = a[ + a_dim1 + 1], abs(r__1))) { + ++nr; + } else { + goto L3002; + } +/* L3001: */ + } +L3002: + ; + } else if (l2rank) { +/* Sudden drop on the diagonal of R1 is used as the criterion for */ +/* close-to-rank-deficient. */ + temp1 = sqrt(sfmin); + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((r__2 = a[p + p * a_dim1], abs(r__2)) < epsln * (r__1 = a[p - + 1 + (p - 1) * a_dim1], abs(r__1)) || (r__3 = a[p + p * + a_dim1], abs(r__3)) < small || l2kill && (r__4 = a[p + p * + a_dim1], abs(r__4)) < temp1) { + goto L3402; + } + ++nr; +/* L3401: */ + } +L3402: + + ; + } else { +/* The goal is high relative accuracy. However, if the matrix */ +/* has high scaled condition number the relative accuracy is in */ +/* general not feasible. Later on, a condition number estimator */ +/* will be deployed to estimate the scaled condition number. */ +/* Here we just remove the underflowed part of the triangular */ +/* factor. This prevents the situation in which the code is */ +/* working hard to get the accuracy not warranted by the data. */ + temp1 = sqrt(sfmin); + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((r__1 = a[p + p * a_dim1], abs(r__1)) < small || l2kill && ( + r__2 = a[p + p * a_dim1], abs(r__2)) < temp1) { + goto L3302; + } + ++nr; +/* L3301: */ + } +L3302: + + ; + } + + almort = FALSE_; + if (nr == *n) { + maxprj = 1.f; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + temp1 = (r__1 = a[p + p * a_dim1], abs(r__1)) / sva[iwork[p]]; + maxprj = f2cmin(maxprj,temp1); +/* L3051: */ + } +/* Computing 2nd power */ + r__1 = maxprj; + if (r__1 * r__1 >= 1.f - (real) (*n) * epsln) { + almort = TRUE_; + } + } + + + sconda = -1.f; + condr1 = -1.f; + condr2 = -1.f; + + if (errest) { + if (*n == nr) { + if (rsvec) { + slacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = sva[iwork[p]]; + r__1 = 1.f / temp1; + sscal_(&p, &r__1, &v[p * v_dim1 + 1], &c__1); +/* L3053: */ + } + spocon_("U", n, &v[v_offset], ldv, &c_b35, &temp1, &work[*n + + 1], &iwork[(*n << 1) + *m + 1], &ierr); + } else if (lsvec) { + slacpy_("U", n, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = sva[iwork[p]]; + r__1 = 1.f / temp1; + sscal_(&p, &r__1, &u[p * u_dim1 + 1], &c__1); +/* L3054: */ + } + spocon_("U", n, &u[u_offset], ldu, &c_b35, &temp1, &work[*n + + 1], &iwork[(*n << 1) + *m + 1], &ierr); + } else { + slacpy_("U", n, n, &a[a_offset], lda, &work[*n + 1], n); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = sva[iwork[p]]; + r__1 = 1.f / temp1; + sscal_(&p, &r__1, &work[*n + (p - 1) * *n + 1], &c__1); +/* L3052: */ + } + spocon_("U", n, &work[*n + 1], n, &c_b35, &temp1, &work[*n + * + n * *n + 1], &iwork[(*n << 1) + *m + 1], &ierr); + } + sconda = 1.f / sqrt(temp1); +/* SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1). */ +/* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ + } else { + sconda = -1.f; + } + } + + l2pert = l2pert && (r__1 = a[a_dim1 + 1] / a[nr + nr * a_dim1], abs(r__1)) + > sqrt(big1); +/* If there is no violent scaling, artificial perturbation is not needed. */ + +/* Phase 3: */ + + if (! (rsvec || lsvec)) { + +/* Singular Values only */ + +/* Computing MIN */ + i__2 = *n - 1; + i__1 = f2cmin(i__2,nr); + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p; + scopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * + a_dim1], &c__1); +/* L1946: */ + } + +/* The following two DO-loops introduce small relative perturbation */ +/* into the strict upper triangle of the lower triangular matrix. */ +/* Small entries below the main diagonal are also changed. */ +/* This modification is useful if the computing environment does not */ +/* provide/allow FLUSH TO ZERO underflow, for it prevents many */ +/* annoying denormalized numbers in case of strongly scaled matrices. */ +/* The perturbation is structured so that it does not introduce any */ +/* new perturbation of the singular values, and it does not destroy */ +/* the job done by the preconditioner. */ +/* The licence for this perturbation is in the variable L2PERT, which */ +/* should be .FALSE. if FLUSH TO ZERO underflow is active. */ + + if (! almort) { + + if (l2pert) { +/* XSC = SQRT(SMALL) */ + xsc = epsln / (real) (*n); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (r__1 = a[q + q * a_dim1], abs(r__1)); + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + if (p > q && (r__1 = a[p + q * a_dim1], abs(r__1)) <= + temp1 || p < q) { + a[p + q * a_dim1] = r_sign(&temp1, &a[p + q * + a_dim1]); + } +/* L4949: */ + } +/* L4947: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + + 1], lda); + } + + + i__1 = *lwork - *n; + sgeqrf_(n, &nr, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1, + &ierr); + + i__1 = nr - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p; + scopy_(&i__2, &a[p + (p + 1) * a_dim1], lda, &a[p + 1 + p * + a_dim1], &c__1); +/* L1948: */ + } + + } + +/* Row-cyclic Jacobi SVD algorithm with column pivoting */ + +/* to drown denormals */ + if (l2pert) { +/* XSC = SQRT(SMALL) */ + xsc = epsln / (real) (*n); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (r__1 = a[q + q * a_dim1], abs(r__1)); + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + if (p > q && (r__1 = a[p + q * a_dim1], abs(r__1)) <= + temp1 || p < q) { + a[p + q * a_dim1] = r_sign(&temp1, &a[p + q * a_dim1]) + ; + } +/* L1949: */ + } +/* L1947: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &a[(a_dim1 << 1) + 1], + lda); + } + +/* triangular matrix (plus perturbation which is ignored in */ +/* the part which destroys triangular form (confusing?!)) */ + + sgesvj_("L", "NoU", "NoV", &nr, &nr, &a[a_offset], lda, &sva[1], n, & + v[v_offset], ldv, &work[1], lwork, info); + + scalem = work[1]; + numrank = i_nint(&work[2]); + + + } else if (rsvec && ! lsvec) { + +/* -> Singular Values and Right Singular Vectors <- */ + + if (almort) { + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + scopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], & + c__1); +/* L1998: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + + sgesvj_("L", "U", "N", n, &nr, &v[v_offset], ldv, &sva[1], &nr, & + a[a_offset], lda, &work[1], lwork, info); + scalem = work[1]; + numrank = i_nint(&work[2]); + } else { + +/* accumulated product of Jacobi rotations, three are perfect ) */ + + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &a[a_dim1 + 2], + lda); + i__1 = *lwork - *n; + sgelqf_(&nr, n, &a[a_offset], lda, &work[1], &work[*n + 1], &i__1, + &ierr); + slacpy_("Lower", &nr, &nr, &a[a_offset], lda, &v[v_offset], ldv); + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + i__1 = *lwork - (*n << 1); + sgeqrf_(&nr, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << + 1) + 1], &i__1, &ierr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p + 1; + scopy_(&i__2, &v[p + p * v_dim1], ldv, &v[p + p * v_dim1], & + c__1); +/* L8998: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + + i__1 = *lwork - *n; + sgesvj_("Lower", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[1], & + nr, &u[u_offset], ldu, &work[*n + 1], &i__1, info); + scalem = work[*n + 1]; + numrank = i_nint(&work[*n + 2]); + if (nr < *n) { + i__1 = *n - nr; + slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], + ldv); + i__1 = *n - nr; + slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + + 1) * v_dim1], ldv); + } + + i__1 = *lwork - *n; + sormlq_("Left", "Transpose", n, n, &nr, &a[a_offset], lda, &work[ + 1], &v[v_offset], ldv, &work[*n + 1], &i__1, &ierr); + + } + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + scopy_(n, &v[p + v_dim1], ldv, &a[iwork[p] + a_dim1], lda); +/* L8991: */ + } + slacpy_("All", n, n, &a[a_offset], lda, &v[v_offset], ldv); + + if (transp) { + slacpy_("All", n, n, &v[v_offset], ldv, &u[u_offset], ldu); + } + + } else if (lsvec && ! rsvec) { + + +/* Jacobi rotations in the Jacobi iterations. */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + scopy_(&i__2, &a[p + p * a_dim1], lda, &u[p + p * u_dim1], &c__1); +/* L1965: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], + ldu); + + i__1 = *lwork - (*n << 1); + sgeqrf_(n, &nr, &u[u_offset], ldu, &work[*n + 1], &work[(*n << 1) + 1] + , &i__1, &ierr); + + i__1 = nr - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p; + scopy_(&i__2, &u[p + (p + 1) * u_dim1], ldu, &u[p + 1 + p * + u_dim1], &c__1); +/* L1967: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("Upper", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + 1], + ldu); + + i__1 = *lwork - *n; + sgesvj_("Lower", "U", "N", &nr, &nr, &u[u_offset], ldu, &sva[1], &nr, + &a[a_offset], lda, &work[*n + 1], &i__1, info); + scalem = work[*n + 1]; + numrank = i_nint(&work[*n + 2]); + + if (nr < *m) { + i__1 = *m - nr; + slaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + slaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * u_dim1 + + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + (nr + + 1) * u_dim1], ldu); + } + } + + i__1 = *lwork - *n; + sormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], &u[ + u_offset], ldu, &work[*n + 1], &i__1, &ierr); + + if (rowpiv) { + i__1 = *m - 1; + slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + + 1], &c_n1); + } + + i__1 = n1; + for (p = 1; p <= i__1; ++p) { + xsc = 1.f / snrm2_(m, &u[p * u_dim1 + 1], &c__1); + sscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); +/* L1974: */ + } + + if (transp) { + slacpy_("All", n, n, &u[u_offset], ldu, &v[v_offset], ldv); + } + + } else { + + + if (! jracc) { + + if (! almort) { + +/* Second Preconditioning Step (QRF [with pivoting]) */ +/* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is */ +/* equivalent to an LQF CALL. Since in many libraries the QRF */ +/* seems to be better optimized than the LQF, we do explicit */ +/* transpose and use the QRF. This is subject to changes in an */ +/* optimized implementation of SGEJSV. */ + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + scopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], + &c__1); +/* L1968: */ + } + +/* denormals in the second QR factorization, where they are */ +/* as good as zeros. This is done to avoid painfully slow */ +/* computation with denormals. The relative size of the perturbation */ +/* is a parameter that can be changed by the implementer. */ +/* This perturbation device will be obsolete on machines with */ +/* properly implemented arithmetic. */ +/* To switch it off, set L2PERT=.FALSE. To remove it from the */ +/* code, remove the action under L2PERT=.TRUE., leave the ELSE part. */ +/* The following two loops should be blocked and fused with the */ +/* transposed copy above. */ + + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (r__1 = v[q + q * v_dim1], abs(r__1)); + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + if (p > q && (r__1 = v[p + q * v_dim1], abs(r__1)) + <= temp1 || p < q) { + v[p + q * v_dim1] = r_sign(&temp1, &v[p + q * + v_dim1]); + } + if (p < q) { + v[p + q * v_dim1] = -v[p + q * v_dim1]; + } +/* L2968: */ + } +/* L2969: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << + 1) + 1], ldv); + } + +/* Estimate the row scaled condition number of R1 */ +/* (If R1 is rectangular, N > NR, then the condition number */ +/* of the leading NR x NR submatrix is estimated.) */ + + slacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1] + , &nr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p + 1; + temp1 = snrm2_(&i__2, &work[(*n << 1) + (p - 1) * nr + p], + &c__1); + i__2 = nr - p + 1; + r__1 = 1.f / temp1; + sscal_(&i__2, &r__1, &work[(*n << 1) + (p - 1) * nr + p], + &c__1); +/* L3950: */ + } + spocon_("Lower", &nr, &work[(*n << 1) + 1], &nr, &c_b35, & + temp1, &work[(*n << 1) + nr * nr + 1], &iwork[*m + (* + n << 1) + 1], &ierr); + condr1 = 1.f / sqrt(temp1); +/* R1 is OK for inverse <=> CONDR1 .LT. FLOAT(N) */ +/* more conservative <=> CONDR1 .LT. SQRT(FLOAT(N)) */ + + cond_ok__ = sqrt((real) nr); +/* [TP] COND_OK is a tuning parameter. */ + if (condr1 < cond_ok__) { +/* implementation, this QRF should be implemented as the QRF */ +/* of a lower triangular matrix. */ +/* R1^t = Q2 * R2 */ + i__1 = *lwork - (*n << 1); + sgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(* + n << 1) + 1], &i__1, &ierr); + + if (l2pert) { + xsc = sqrt(small) / epsln; + i__1 = nr; + for (p = 2; p <= i__1; ++p) { + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { +/* Computing MIN */ + r__3 = (r__1 = v[p + p * v_dim1], abs(r__1)), + r__4 = (r__2 = v[q + q * v_dim1], abs( + r__2)); + temp1 = xsc * f2cmin(r__3,r__4); + if ((r__1 = v[q + p * v_dim1], abs(r__1)) <= + temp1) { + v[q + p * v_dim1] = r_sign(&temp1, &v[q + + p * v_dim1]); + } +/* L3958: */ + } +/* L3959: */ + } + } + + if (nr != *n) { + slacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << + 1) + 1], n); + } + + i__1 = nr - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p; + scopy_(&i__2, &v[p + (p + 1) * v_dim1], ldv, &v[p + 1 + + p * v_dim1], &c__1); +/* L1969: */ + } + + condr2 = condr1; + + } else { + +/* Note that windowed pivoting would be equally good */ +/* numerically, and more run-time efficient. So, in */ +/* an optimal implementation, the next call to SGEQP3 */ +/* should be replaced with eg. CALL SGEQPX (ACM TOMS #782) */ +/* with properly (carefully) chosen parameters. */ + +/* R1^t * P2 = Q2 * R2 */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + iwork[*n + p] = 0; +/* L3003: */ + } + i__1 = *lwork - (*n << 1); + sgeqp3_(n, &nr, &v[v_offset], ldv, &iwork[*n + 1], &work[* + n + 1], &work[(*n << 1) + 1], &i__1, &ierr); +/* * CALL SGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), */ +/* * $ LWORK-2*N, IERR ) */ + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (p = 2; p <= i__1; ++p) { + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { +/* Computing MIN */ + r__3 = (r__1 = v[p + p * v_dim1], abs(r__1)), + r__4 = (r__2 = v[q + q * v_dim1], abs( + r__2)); + temp1 = xsc * f2cmin(r__3,r__4); + if ((r__1 = v[q + p * v_dim1], abs(r__1)) <= + temp1) { + v[q + p * v_dim1] = r_sign(&temp1, &v[q + + p * v_dim1]); + } +/* L3968: */ + } +/* L3969: */ + } + } + + slacpy_("A", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + + 1], n); + + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (p = 2; p <= i__1; ++p) { + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { +/* Computing MIN */ + r__3 = (r__1 = v[p + p * v_dim1], abs(r__1)), + r__4 = (r__2 = v[q + q * v_dim1], abs( + r__2)); + temp1 = xsc * f2cmin(r__3,r__4); + v[p + q * v_dim1] = -r_sign(&temp1, &v[q + p * + v_dim1]); +/* L8971: */ + } +/* L8970: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("L", &i__1, &i__2, &c_b34, &c_b34, &v[v_dim1 + + 2], ldv); + } +/* Now, compute R2 = L3 * Q3, the LQ factorization. */ + i__1 = *lwork - (*n << 1) - *n * nr - nr; + sgelqf_(&nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + *n + * nr + 1], &work[(*n << 1) + *n * nr + nr + 1], & + i__1, &ierr); + slacpy_("L", &nr, &nr, &v[v_offset], ldv, &work[(*n << 1) + + *n * nr + nr + 1], &nr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + temp1 = snrm2_(&p, &work[(*n << 1) + *n * nr + nr + p] + , &nr); + r__1 = 1.f / temp1; + sscal_(&p, &r__1, &work[(*n << 1) + *n * nr + nr + p], + &nr); +/* L4950: */ + } + spocon_("L", &nr, &work[(*n << 1) + *n * nr + nr + 1], & + nr, &c_b35, &temp1, &work[(*n << 1) + *n * nr + + nr + nr * nr + 1], &iwork[*m + (*n << 1) + 1], & + ierr); + condr2 = 1.f / sqrt(temp1); + + if (condr2 >= cond_ok__) { +/* (this overwrites the copy of R2, as it will not be */ +/* needed in this branch, but it does not overwritte the */ +/* Huseholder vectors of Q2.). */ + slacpy_("U", &nr, &nr, &v[v_offset], ldv, &work[(*n << + 1) + 1], n); +/* WORK(2*N+N*NR+1:2*N+N*NR+N) */ + } + + } + + if (l2pert) { + xsc = sqrt(small); + i__1 = nr; + for (q = 2; q <= i__1; ++q) { + temp1 = xsc * v[q + q * v_dim1]; + i__2 = q - 1; + for (p = 1; p <= i__2; ++p) { +/* V(p,q) = - SIGN( TEMP1, V(q,p) ) */ + v[p + q * v_dim1] = -r_sign(&temp1, &v[p + q * + v_dim1]); +/* L4969: */ + } +/* L4968: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << + 1) + 1], ldv); + } + +/* Second preconditioning finished; continue with Jacobi SVD */ +/* The input matrix is lower trinagular. */ + +/* Recover the right singular vectors as solution of a well */ +/* conditioned triangular matrix equation. */ + + if (condr1 < cond_ok__) { + + i__1 = *lwork - (*n << 1) - *n * nr - nr; + sgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[ + 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * + nr + nr + 1], &i__1, info); + scalem = work[(*n << 1) + *n * nr + nr + 1]; + numrank = i_nint(&work[(*n << 1) + *n * nr + nr + 2]); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + scopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 + + 1], &c__1); + sscal_(&nr, &sva[p], &v[p * v_dim1 + 1], &c__1); +/* L3970: */ + } + + if (nr == *n) { +/* :)) .. best case, R1 is inverted. The solution of this matrix */ +/* equation is Q2*V2 = the product of the Jacobi rotations */ +/* used in SGESVJ, premultiplied with the orthogonal matrix */ +/* from the second QR factorization. */ + strsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &a[ + a_offset], lda, &v[v_offset], ldv); + } else { +/* is inverted to get the product of the Jacobi rotations */ +/* used in SGESVJ. The Q-factor from the second QR */ +/* factorization is then built in explicitly. */ + strsm_("L", "U", "T", "N", &nr, &nr, &c_b35, &work[(* + n << 1) + 1], n, &v[v_offset], ldv); + if (nr < *n) { + i__1 = *n - nr; + slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + + 1 + v_dim1], ldv); + i__1 = *n - nr; + slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + + 1) * v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + + 1 + (nr + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + sormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, + &work[*n + 1], &v[v_offset], ldv, &work[(*n << + 1) + *n * nr + nr + 1], &i__1, &ierr); + } + + } else if (condr2 < cond_ok__) { + +/* :) .. the input matrix A is very likely a relative of */ +/* the Kahan matrix :) */ +/* The matrix R2 is inverted. The solution of the matrix equation */ +/* is Q3^T*V3 = the product of the Jacobi rotations (appplied to */ +/* the lower triangular L3 from the LQ factorization of */ +/* R2=L3*Q3), pre-multiplied with the transposed Q3. */ + i__1 = *lwork - (*n << 1) - *n * nr - nr; + sgesvj_("L", "U", "N", &nr, &nr, &v[v_offset], ldv, &sva[ + 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * + nr + nr + 1], &i__1, info); + scalem = work[(*n << 1) + *n * nr + nr + 1]; + numrank = i_nint(&work[(*n << 1) + *n * nr + nr + 2]); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + scopy_(&nr, &v[p * v_dim1 + 1], &c__1, &u[p * u_dim1 + + 1], &c__1); + sscal_(&nr, &sva[p], &u[p * u_dim1 + 1], &c__1); +/* L3870: */ + } + strsm_("L", "U", "N", "N", &nr, &nr, &c_b35, &work[(*n << + 1) + 1], n, &u[u_offset], ldu); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = + u[p + q * u_dim1]; +/* L872: */ + } + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr + + p]; +/* L874: */ + } +/* L873: */ + } + if (nr < *n) { + i__1 = *n - nr; + slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + + (nr + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + sormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, & + work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + + *n * nr + nr + 1], &i__1, &ierr); + } else { +/* Last line of defense. */ +/* #:( This is a rather pathological case: no scaled condition */ +/* improvement after two pivoted QR factorizations. Other */ +/* possibility is that the rank revealing QR factorization */ +/* or the condition estimator has failed, or the COND_OK */ +/* is set very close to ONE (which is unnecessary). Normally, */ +/* this branch should never be executed, but in rare cases of */ +/* failure of the RRQR or condition estimator, the last line of */ +/* defense ensures that SGEJSV completes the task. */ +/* Compute the full SVD of L3 using SGESVJ with explicit */ +/* accumulation of Jacobi rotations. */ + i__1 = *lwork - (*n << 1) - *n * nr - nr; + sgesvj_("L", "U", "V", &nr, &nr, &v[v_offset], ldv, &sva[ + 1], &nr, &u[u_offset], ldu, &work[(*n << 1) + *n * + nr + nr + 1], &i__1, info); + scalem = work[(*n << 1) + *n * nr + nr + 1]; + numrank = i_nint(&work[(*n << 1) + *n * nr + nr + 2]); + if (nr < *n) { + i__1 = *n - nr; + slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + + (nr + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + sormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, & + work[*n + 1], &v[v_offset], ldv, &work[(*n << 1) + + *n * nr + nr + 1], &i__1, &ierr); + + i__1 = *lwork - (*n << 1) - *n * nr - nr; + sormlq_("L", "T", &nr, &nr, &nr, &work[(*n << 1) + 1], n, + &work[(*n << 1) + *n * nr + 1], &u[u_offset], ldu, + &work[(*n << 1) + *n * nr + nr + 1], &i__1, & + ierr); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[*n + p]] = + u[p + q * u_dim1]; +/* L772: */ + } + i__2 = nr; + for (p = 1; p <= i__2; ++p) { + u[p + q * u_dim1] = work[(*n << 1) + *n * nr + nr + + p]; +/* L774: */ + } +/* L773: */ + } + + } + +/* Permute the rows of V using the (column) permutation from the */ +/* first QRF. Also, scale the columns to make them unit in */ +/* Euclidean norm. This applies to all cases. */ + + temp1 = sqrt((real) (*n)) * epsln; + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * + v_dim1]; +/* L972: */ + } + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p] + ; +/* L973: */ + } + xsc = 1.f / snrm2_(n, &v[q * v_dim1 + 1], &c__1); + if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) { + sscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1); + } +/* L1972: */ + } +/* At this moment, V contains the right singular vectors of A. */ +/* Next, assemble the left singular vector matrix U (M x N). */ + if (nr < *m) { + i__1 = *m - nr; + slaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + slaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + + (nr + 1) * u_dim1], ldu); + } + } + +/* The Q matrix from the first QRF is built into the left singular */ +/* matrix U. This applies to all cases. */ + + i__1 = *lwork - *n; + sormqr_("Left", "No_Tr", m, &n1, n, &a[a_offset], lda, &work[ + 1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); +/* The columns of U are normalized. The cost is O(M*N) flops. */ + temp1 = sqrt((real) (*m)) * epsln; + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + xsc = 1.f / snrm2_(m, &u[p * u_dim1 + 1], &c__1); + if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) { + sscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); + } +/* L1973: */ + } + +/* If the initial QRF is computed with row pivoting, the left */ +/* singular vectors must be adjusted. */ + + if (rowpiv) { + i__1 = *m - 1; + slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n + << 1) + 1], &c_n1); + } + + } else { + +/* the second QRF is not needed */ + + slacpy_("Upper", n, n, &a[a_offset], lda, &work[*n + 1], n); + if (l2pert) { + xsc = sqrt(small); + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + temp1 = xsc * work[*n + (p - 1) * *n + p]; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + work[*n + (q - 1) * *n + p] = -r_sign(&temp1, & + work[*n + (p - 1) * *n + q]); +/* L5971: */ + } +/* L5970: */ + } + } else { + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("Lower", &i__1, &i__2, &c_b34, &c_b34, &work[*n + + 2], n); + } + + i__1 = *lwork - *n - *n * *n; + sgesvj_("Upper", "U", "N", n, n, &work[*n + 1], n, &sva[1], n, + &u[u_offset], ldu, &work[*n + *n * *n + 1], &i__1, + info); + + scalem = work[*n + *n * *n + 1]; + numrank = i_nint(&work[*n + *n * *n + 2]); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + scopy_(n, &work[*n + (p - 1) * *n + 1], &c__1, &u[p * + u_dim1 + 1], &c__1); + sscal_(n, &sva[p], &work[*n + (p - 1) * *n + 1], &c__1); +/* L6970: */ + } + + strsm_("Left", "Upper", "NoTrans", "No UD", n, n, &c_b35, &a[ + a_offset], lda, &work[*n + 1], n); + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + scopy_(n, &work[*n + p], n, &v[iwork[p] + v_dim1], ldv); +/* L6972: */ + } + temp1 = sqrt((real) (*n)) * epsln; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + xsc = 1.f / snrm2_(n, &v[p * v_dim1 + 1], &c__1); + if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) { + sscal_(n, &xsc, &v[p * v_dim1 + 1], &c__1); + } +/* L6971: */ + } + +/* Assemble the left singular vector matrix U (M x N). */ + + if (*n < *m) { + i__1 = *m - *n; + slaset_("A", &i__1, n, &c_b34, &c_b34, &u[*n + 1 + u_dim1] + , ldu); + if (*n < n1) { + i__1 = n1 - *n; + slaset_("A", n, &i__1, &c_b34, &c_b34, &u[(*n + 1) * + u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[*n + 1 + + (*n + 1) * u_dim1], ldu); + } + } + i__1 = *lwork - *n; + sormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[ + 1], &u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); + temp1 = sqrt((real) (*m)) * epsln; + i__1 = n1; + for (p = 1; p <= i__1; ++p) { + xsc = 1.f / snrm2_(m, &u[p * u_dim1 + 1], &c__1); + if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) { + sscal_(m, &xsc, &u[p * u_dim1 + 1], &c__1); + } +/* L6973: */ + } + + if (rowpiv) { + i__1 = *m - 1; + slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n + << 1) + 1], &c_n1); + } + + } + +/* end of the >> almost orthogonal case << in the full SVD */ + + } else { + +/* This branch deploys a preconditioned Jacobi SVD with explicitly */ +/* accumulated rotations. It is included as optional, mainly for */ +/* experimental purposes. It does perform well, and can also be used. */ +/* In this implementation, this branch will be automatically activated */ +/* if the condition number sigma_max(A) / sigma_min(A) is predicted */ +/* to be greater than the overflow threshold. This is because the */ +/* a posteriori computation of the singular vectors assumes robust */ +/* implementation of BLAS and some LAPACK procedures, capable of working */ +/* in presence of extreme values. Since that is not always the case, ... */ + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + scopy_(&i__2, &a[p + p * a_dim1], lda, &v[p + p * v_dim1], & + c__1); +/* L7968: */ + } + + if (l2pert) { + xsc = sqrt(small / epsln); + i__1 = nr; + for (q = 1; q <= i__1; ++q) { + temp1 = xsc * (r__1 = v[q + q * v_dim1], abs(r__1)); + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + if (p > q && (r__1 = v[p + q * v_dim1], abs(r__1)) <= + temp1 || p < q) { + v[p + q * v_dim1] = r_sign(&temp1, &v[p + q * + v_dim1]); + } + if (p < q) { + v[p + q * v_dim1] = -v[p + q * v_dim1]; + } +/* L5968: */ + } +/* L5969: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &v[(v_dim1 << 1) + + 1], ldv); + } + i__1 = *lwork - (*n << 1); + sgeqrf_(n, &nr, &v[v_offset], ldv, &work[*n + 1], &work[(*n << 1) + + 1], &i__1, &ierr); + slacpy_("L", n, &nr, &v[v_offset], ldv, &work[(*n << 1) + 1], n); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr - p + 1; + scopy_(&i__2, &v[p + p * v_dim1], ldv, &u[p + p * u_dim1], & + c__1); +/* L7969: */ + } + if (l2pert) { + xsc = sqrt(small / epsln); + i__1 = nr; + for (q = 2; q <= i__1; ++q) { + i__2 = q - 1; + for (p = 1; p <= i__2; ++p) { +/* Computing MIN */ + r__3 = (r__1 = u[p + p * u_dim1], abs(r__1)), r__4 = ( + r__2 = u[q + q * u_dim1], abs(r__2)); + temp1 = xsc * f2cmin(r__3,r__4); + u[p + q * u_dim1] = -r_sign(&temp1, &u[q + p * u_dim1] + ); +/* L9971: */ + } +/* L9970: */ + } + } else { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b34, &c_b34, &u[(u_dim1 << 1) + + 1], ldu); + } + i__1 = *lwork - (*n << 1) - *n * nr; + sgesvj_("L", "U", "V", &nr, &nr, &u[u_offset], ldu, &sva[1], n, & + v[v_offset], ldv, &work[(*n << 1) + *n * nr + 1], &i__1, + info); + scalem = work[(*n << 1) + *n * nr + 1]; + numrank = i_nint(&work[(*n << 1) + *n * nr + 2]); + if (nr < *n) { + i__1 = *n - nr; + slaset_("A", &i__1, &nr, &c_b34, &c_b34, &v[nr + 1 + v_dim1], + ldv); + i__1 = *n - nr; + slaset_("A", &nr, &i__1, &c_b34, &c_b34, &v[(nr + 1) * v_dim1 + + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &v[nr + 1 + (nr + + 1) * v_dim1], ldv); + } + i__1 = *lwork - (*n << 1) - *n * nr - nr; + sormqr_("L", "N", n, n, &nr, &work[(*n << 1) + 1], n, &work[*n + + 1], &v[v_offset], ldv, &work[(*n << 1) + *n * nr + nr + 1] + , &i__1, &ierr); + +/* Permute the rows of V using the (column) permutation from the */ +/* first QRF. Also, scale the columns to make them unit in */ +/* Euclidean norm. This applies to all cases. */ + + temp1 = sqrt((real) (*n)) * epsln; + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + work[(*n << 1) + *n * nr + nr + iwork[p]] = v[p + q * + v_dim1]; +/* L8972: */ + } + i__2 = *n; + for (p = 1; p <= i__2; ++p) { + v[p + q * v_dim1] = work[(*n << 1) + *n * nr + nr + p]; +/* L8973: */ + } + xsc = 1.f / snrm2_(n, &v[q * v_dim1 + 1], &c__1); + if (xsc < 1.f - temp1 || xsc > temp1 + 1.f) { + sscal_(n, &xsc, &v[q * v_dim1 + 1], &c__1); + } +/* L7972: */ + } + +/* At this moment, V contains the right singular vectors of A. */ +/* Next, assemble the left singular vector matrix U (M x N). */ + + if (nr < *m) { + i__1 = *m - nr; + slaset_("A", &i__1, &nr, &c_b34, &c_b34, &u[nr + 1 + u_dim1], + ldu); + if (nr < n1) { + i__1 = n1 - nr; + slaset_("A", &nr, &i__1, &c_b34, &c_b34, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + slaset_("A", &i__1, &i__2, &c_b34, &c_b35, &u[nr + 1 + ( + nr + 1) * u_dim1], ldu); + } + } + + i__1 = *lwork - *n; + sormqr_("Left", "No Tr", m, &n1, n, &a[a_offset], lda, &work[1], & + u[u_offset], ldu, &work[*n + 1], &i__1, &ierr); + + if (rowpiv) { + i__1 = *m - 1; + slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[(*n << 1) + + 1], &c_n1); + } + + + } + if (transp) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + sswap_(n, &u[p * u_dim1 + 1], &c__1, &v[p * v_dim1 + 1], & + c__1); +/* L6974: */ + } + } + + } +/* end of the full SVD */ + +/* Undo scaling, if necessary (and possible) */ + + if (uscal2 <= big / sva[1] * uscal1) { + slascl_("G", &c__0, &c__0, &uscal1, &uscal2, &nr, &c__1, &sva[1], n, & + ierr); + uscal1 = 1.f; + uscal2 = 1.f; + } + + if (nr < *n) { + i__1 = *n; + for (p = nr + 1; p <= i__1; ++p) { + sva[p] = 0.f; +/* L3004: */ + } + } + + work[1] = uscal2 * scalem; + work[2] = uscal1; + if (errest) { + work[3] = sconda; + } + if (lsvec && rsvec) { + work[4] = condr1; + work[5] = condr2; + } + if (l2tran) { + work[6] = entra; + work[7] = entrat; + } + + iwork[1] = nr; + iwork[2] = numrank; + iwork[3] = warning; + + return 0; +} /* sgejsv_ */ + diff --git a/lapack-netlib/SRC/sgelq.c b/lapack-netlib/SRC/sgelq.c new file mode 100644 index 000000000..a135a90ee --- /dev/null +++ b/lapack-netlib/SRC/sgelq.c @@ -0,0 +1,742 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGELQ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, M, N, TSIZE, LWORK */ +/* REAL A( LDA, * ), T( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGELQ computes an LQ factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = ( L 0 ) * Q */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a N-by-N orthogonal matrix; */ +/* > L is a lower-triangular M-by-M matrix; */ +/* > 0 is a M-by-(N-M) zero matrix, if M < N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the M-by-f2cmin(M,N) lower trapezoidal matrix L */ +/* > (L is lower triangular if M <= N); */ +/* > the elements above the diagonal are used to store part of the */ +/* > data structure to represent Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (MAX(5,TSIZE)) */ +/* > On exit, if INFO = 0, T(1) returns optimal (or either minimal */ +/* > or optimal, if query is assumed) TSIZE. See TSIZE for details. */ +/* > Remaining T contains part of the data structure used to represent Q. */ +/* > If one wants to apply or construct Q, then one needs to keep T */ +/* > (in addition to A) and pass it to further subroutines. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TSIZE */ +/* > \verbatim */ +/* > TSIZE is INTEGER */ +/* > If TSIZE >= 5, the dimension of the array T. */ +/* > If TSIZE = -1 or -2, then a workspace query is assumed. The routine */ +/* > only calculates the sizes of the T and WORK arrays, returns these */ +/* > values as the first entries of the T and WORK arrays, and no error */ +/* > message related to T or WORK is issued by XERBLA. */ +/* > If TSIZE = -1, the routine calculates optimal size of T for the */ +/* > optimum performance and returns this value in T(1). */ +/* > If TSIZE = -2, the routine calculates minimal size of T and */ +/* > returns this value in T(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */ +/* > or optimal, if query was assumed) LWORK. */ +/* > See LWORK for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1 or -2, then a workspace query is assumed. The routine */ +/* > only calculates the sizes of the T and WORK arrays, returns these */ +/* > values as the first entries of the T and WORK arrays, and no error */ +/* > message related to T or WORK is issued by XERBLA. */ +/* > If LWORK = -1, the routine calculates optimal size of WORK for the */ +/* > optimal performance and returns this value in WORK(1). */ +/* > If LWORK = -2, the routine calculates minimal size of WORK and */ +/* > returns this value in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \par Further Details */ +/* ==================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The goal of the interface is to give maximum freedom to the developers for */ +/* > creating any LQ factorization algorithm they wish. The triangular */ +/* > (trapezoidal) L has to be stored in the lower part of A. The lower part of A */ +/* > and the array T can be used to store any relevant information for applying or */ +/* > constructing the Q factor. The WORK array can safely be discarded after exit. */ +/* > */ +/* > Caution: One should not expect the sizes of T and WORK to be the same from one */ +/* > LAPACK implementation to the other, or even from one execution to the other. */ +/* > A workspace query (for T and WORK) is needed at each execution. However, */ +/* > for a given execution, the size of T and WORK are fixed and will not change */ +/* > from one query to the next. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* > \par Further Details particular to this LAPACK implementation: */ +/* ============================================================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > These details are particular for this LAPACK implementation. Users should not */ +/* > take them for granted. These details may change in the future, and are not likely */ +/* > true for another LAPACK implementation. These details are relevant if one wants */ +/* > to try to understand the code. They are not part of the interface. */ +/* > */ +/* > In this version, */ +/* > */ +/* > T(2): row block size (MB) */ +/* > T(3): column block size (NB) */ +/* > T(6:TSIZE): data structure needed for Q, computed by */ +/* > SLASWLQ or SGELQT */ +/* > */ +/* > Depending on the matrix dimensions M and N, and row and column */ +/* > block sizes MB and NB returned by ILAENV, SGELQ will use either */ +/* > SLASWLQ (if the matrix is short-and-wide) or SGELQT to compute */ +/* > the LQ factorization. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgelq_(integer *m, integer *n, real *a, integer *lda, + real *t, integer *tsize, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + logical mint, minw; + integer lwmin, lwreq, lwopt, mb, nb, nblcks; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sgelqt_(integer *, integer *, integer *, real + *, integer *, real *, integer *, real *, integer *); + logical lminws, lquery; + integer mintsz; + extern /* Subroutine */ int slaswlq_(integer *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , integer *); + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --t; + --work; + + /* Function Body */ + *info = 0; + + lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2; + + mint = FALSE_; + minw = FALSE_; + if (*tsize == -2 || *lwork == -2) { + if (*tsize != -1) { + mint = TRUE_; + } + if (*lwork != -1) { + minw = TRUE_; + } + } + +/* Determine the block size */ + + if (f2cmin(*m,*n) > 0) { + mb = ilaenv_(&c__1, "SGELQ ", " ", m, n, &c__1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb = ilaenv_(&c__1, "SGELQ ", " ", m, n, &c__2, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + mb = 1; + nb = *n; + } + if (mb > f2cmin(*m,*n) || mb < 1) { + mb = 1; + } + if (nb > *n || nb <= *m) { + nb = *n; + } + mintsz = *m + 5; + if (nb > *m && *n > *m) { + if ((*n - *m) % (nb - *m) == 0) { + nblcks = (*n - *m) / (nb - *m); + } else { + nblcks = (*n - *m) / (nb - *m) + 1; + } + } else { + nblcks = 1; + } + +/* Determine if the workspace size satisfies minimal size */ + + if (*n <= *m || nb <= *m || nb >= *n) { + lwmin = f2cmax(1,*n); +/* Computing MAX */ + i__1 = 1, i__2 = mb * *n; + lwopt = f2cmax(i__1,i__2); + } else { + lwmin = f2cmax(1,*m); +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m; + lwopt = f2cmax(i__1,i__2); + } + lminws = FALSE_; +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m * nblcks + 5; + if ((*tsize < f2cmax(i__1,i__2) || *lwork < lwopt) && *lwork >= lwmin && * + tsize >= mintsz && ! lquery) { +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m * nblcks + 5; + if (*tsize < f2cmax(i__1,i__2)) { + lminws = TRUE_; + mb = 1; + nb = *n; + } + if (*lwork < lwopt) { + lminws = TRUE_; + mb = 1; + } + } + if (*n <= *m || nb <= *m || nb >= *n) { +/* Computing MAX */ + i__1 = 1, i__2 = mb * *n; + lwreq = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m; + lwreq = f2cmax(i__1,i__2); + } + + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mb * *m * nblcks + 5; + if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) { + *info = -6; + } else if (*lwork < lwreq && ! lquery && ! lminws) { + *info = -8; + } + } + + if (*info == 0) { + if (mint) { + t[1] = (real) mintsz; + } else { + t[1] = (real) (mb * *m * nblcks + 5); + } + t[2] = (real) mb; + t[3] = (real) nb; + if (minw) { + work[1] = (real) lwmin; + } else { + work[1] = (real) lwreq; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQ", &i__1, (ftnlen)5); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (f2cmin(*m,*n) == 0) { + return 0; + } + +/* The LQ Decomposition */ + + if (*n <= *m || nb <= *m || nb >= *n) { + sgelqt_(m, n, &mb, &a[a_offset], lda, &t[6], &mb, &work[1], info); + } else { + slaswlq_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &mb, &work[1], + lwork, info); + } + + work[1] = (real) lwreq; + return 0; + +/* End of SGELQ */ + +} /* sgelq_ */ + diff --git a/lapack-netlib/SRC/sgelq2.c b/lapack-netlib/SRC/sgelq2.c new file mode 100644 index 000000000..d0ac5feb0 --- /dev/null +++ b/lapack-netlib/SRC/sgelq2.c @@ -0,0 +1,597 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGELQ2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGELQ2 computes an LQ factorization of a real m-by-n matrix A: */ +/* > */ +/* > A = ( L 0 ) * Q */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a n-by-n orthogonal matrix; */ +/* > L is an lower-triangular m-by-m matrix; */ +/* > 0 is a m-by-(n-m) zero matrix, if m < n. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the m by f2cmin(m,n) lower trapezoidal matrix L (L is */ +/* > lower triangular if m <= n); the elements above the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgelq2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *, ftnlen), slarfg_(integer *, real *, real *, + integer *, real *); + real aii; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQ2", &i__1, (ftnlen)6); + return 0; + } + + k = f2cmin(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + f2cmin(i__3,*n) * a_dim1] + , lda, &tau[i__]); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of SGELQ2 */ + +} /* sgelq2_ */ + diff --git a/lapack-netlib/SRC/sgelqf.c b/lapack-netlib/SRC/sgelqf.c new file mode 100644 index 000000000..4488f3edc --- /dev/null +++ b/lapack-netlib/SRC/sgelqf.c @@ -0,0 +1,698 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGELQF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGELQF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGELQF computes an LQ factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = ( L 0 ) * Q */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a N-by-N orthogonal matrix; */ +/* > L is an lower-triangular M-by-M matrix; */ +/* > 0 is a M-by-(N-M) zero matrix, if M < N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the m-by-f2cmin(m,n) lower trapezoidal matrix L (L is */ +/* > lower triangular if m <= n); the elements above the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,M). */ +/* > For optimum performance LWORK >= M*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgelqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo; + extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer + *, real *, real *, integer *); + integer ib, nb, nx; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*lwork < f2cmax(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the LQ factorization of the current block */ +/* A(i:i+ib-1,i:n) */ + + i__3 = *n - i__ + 1; + sgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *n - i__ + 1; + slarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (real) iws; + return 0; + +/* End of SGELQF */ + +} /* sgelqf_ */ + diff --git a/lapack-netlib/SRC/sgelqt.c b/lapack-netlib/SRC/sgelqt.c new file mode 100644 index 000000000..149cbc21a --- /dev/null +++ b/lapack-netlib/SRC/sgelqt.c @@ -0,0 +1,601 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGELQT */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) */ + +/* INTEGER INFO, LDA, LDT, M, N, MB */ +/* REAL A( LDA, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELQT computes a blocked LQ factorization of a real M-by-N matrix A */ +/* > using the compact WY representation of Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MB */ +/* > \verbatim */ +/* > MB is INTEGER */ +/* > The block size to be used in the blocked QR. MIN(M,N) >= MB >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is */ +/* > lower triangular if M <= N); the elements above the diagonal */ +/* > are the rows of V. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT,MIN(M,N)) */ +/* > The upper triangular block reflectors stored in compact form */ +/* > as a sequence of upper triangular blocks. See below */ +/* > for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= MB. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MB*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th row */ +/* > above the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 v1 v1 v1 v1 ) */ +/* > ( 1 v2 v2 v2 ) */ +/* > ( 1 v3 v3 ) */ +/* > */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. */ +/* > Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each */ +/* > block is of order MB except for the last block, which is of order */ +/* > IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block */ +/* > reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB */ +/* > for the last block) T's are stored in the MB-by-K matrix T as */ +/* > */ +/* > T = (T1 T2 ... TB). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgelqt_(integer *m, integer *n, integer *mb, real *a, + integer *lda, real *t, integer *ldt, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, k, iinfo, ib; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen), sgelqt3_( + integer *, integer *, real *, integer *, real *, integer *, + integer *); + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*mb < 1 || *mb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldt < *mb) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQT", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + return 0; + } + +/* Blocked loop of length K */ + + i__1 = k; + i__2 = *mb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,*mb); + +/* Compute the LQ factorization of the current block A(I:M,I:I+IB-1) */ + + i__3 = *n - i__ + 1; + sgelqt3_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + 1] + , ldt, &iinfo); + if (i__ + ib <= *m) { + +/* Update by applying H**T to A(I:M,I+IB:N) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + i__5 = *m - i__ - ib + 1; + slarfb_("R", "N", "F", "R", &i__3, &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + ib + + i__ * a_dim1], lda, &work[1], &i__5); + } + } + return 0; + +/* End of SGELQT */ + +} /* sgelqt_ */ + diff --git a/lapack-netlib/SRC/sgelqt3.c b/lapack-netlib/SRC/sgelqt3.c new file mode 100644 index 000000000..9601beac2 --- /dev/null +++ b/lapack-netlib/SRC/sgelqt3.c @@ -0,0 +1,657 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGELQT3 */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELQT3( M, N, A, LDA, T, LDT, INFO ) */ + +/* INTEGER INFO, LDA, M, N, LDT */ +/* REAL A( LDA, * ), T( LDT, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELQT3 recursively computes a LQ factorization of a real M-by-N */ +/* > matrix A, using the compact WY representation of Q. */ +/* > */ +/* > Based on the algorithm of Elmroth and Gustavson, */ +/* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M =< N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the real M-by-N matrix A. On exit, the elements on and */ +/* > below the diagonal contain the N-by-N lower triangular matrix L; the */ +/* > elements above the diagonal are the rows of V. See below for */ +/* > further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT,N) */ +/* > The N-by-N upper triangular factor of the block reflector. */ +/* > The elements on and above the diagonal contain the block */ +/* > reflector T; the elements below the diagonal are not used. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th row */ +/* > above the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 v1 v1 v1 v1 ) */ +/* > ( 1 v2 v2 v2 ) */ +/* > ( 1 v3 v3 v3 ) */ +/* > */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. The */ +/* > block reflector H is then given by */ +/* > */ +/* > H = I - V * T * V**T */ +/* > */ +/* > where V**T is the transpose of V. */ +/* > */ +/* > For details of the algorithm, see Elmroth and Gustavson (cited above). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgelqt3_(integer *m, integer *n, real *a, integer *lda, + real *t, integer *ldt, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + integer i1, j1, m1, m2; + extern /* Subroutine */ int strmm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *, ftnlen), slarfg_(integer *, real *, real *, integer *, real *); + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*ldt < f2cmax(1,*m)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQT3", &i__1, (ftnlen)7); + return 0; + } + + if (*m == 1) { + +/* Compute Householder transform when N=1 */ + + slarfg_(n, &a[a_offset], &a[f2cmin(2,*n) * a_dim1 + 1], lda, &t[t_offset] + ); + + } else { + +/* Otherwise, split A into blocks... */ + + m1 = *m / 2; + m2 = *m - m1; +/* Computing MIN */ + i__1 = m1 + 1; + i1 = f2cmin(i__1,*m); +/* Computing MIN */ + i__1 = *m + 1; + j1 = f2cmin(i__1,*n); + +/* Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */ + + sgelqt3_(&m1, n, &a[a_offset], lda, &t[t_offset], ldt, &iinfo); + +/* Compute A(J1:M,1:N) = Q1^H A(J1:M,1:N) [workspace: T(1:N1,J1:N)] */ + + i__1 = m2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = m1; + for (j = 1; j <= i__2; ++j) { + t[i__ + m1 + j * t_dim1] = a[i__ + m1 + j * a_dim1]; + } + } + strmm_("R", "U", "T", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 + + t_dim1], ldt); + + i__1 = *n - m1; + sgemm_("N", "T", &m2, &m1, &i__1, &c_b7, &a[i1 + i1 * a_dim1], lda, & + a[i1 * a_dim1 + 1], lda, &c_b7, &t[i1 + t_dim1], ldt); + + strmm_("R", "U", "N", "N", &m2, &m1, &c_b7, &t[t_offset], ldt, &t[i1 + + t_dim1], ldt); + + i__1 = *n - m1; + sgemm_("N", "N", &m2, &i__1, &m1, &c_b19, &t[i1 + t_dim1], ldt, &a[i1 + * a_dim1 + 1], lda, &c_b7, &a[i1 + i1 * a_dim1], lda); + + strmm_("R", "U", "N", "U", &m2, &m1, &c_b7, &a[a_offset], lda, &t[i1 + + t_dim1], ldt); + + i__1 = m2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = m1; + for (j = 1; j <= i__2; ++j) { + a[i__ + m1 + j * a_dim1] -= t[i__ + m1 + j * t_dim1]; + t[i__ + m1 + j * t_dim1] = 0.f; + } + } + +/* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */ + + i__1 = *n - m1; + sgelqt3_(&m2, &i__1, &a[i1 + i1 * a_dim1], lda, &t[i1 + i1 * t_dim1], + ldt, &iinfo); + +/* Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 */ + + i__1 = m2; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = m1; + for (j = 1; j <= i__2; ++j) { + t[j + (i__ + m1) * t_dim1] = a[j + (i__ + m1) * a_dim1]; + } + } + + strmm_("R", "U", "T", "U", &m1, &m2, &c_b7, &a[i1 + i1 * a_dim1], lda, + &t[i1 * t_dim1 + 1], ldt); + + i__1 = *n - *m; + sgemm_("N", "T", &m1, &m2, &i__1, &c_b7, &a[j1 * a_dim1 + 1], lda, &a[ + i1 + j1 * a_dim1], lda, &c_b7, &t[i1 * t_dim1 + 1], ldt); + + strmm_("L", "U", "N", "N", &m1, &m2, &c_b19, &t[t_offset], ldt, &t[i1 + * t_dim1 + 1], ldt); + + strmm_("R", "U", "N", "N", &m1, &m2, &c_b7, &t[i1 + i1 * t_dim1], ldt, + &t[i1 * t_dim1 + 1], ldt); + + + +/* Y = (Y1,Y2); L = [ L1 0 ]; T = [T1 T3] */ +/* [ A(1:N1,J1:N) L2 ] [ 0 T2] */ + + } + + return 0; + +/* End of SGELQT3 */ + +} /* sgelqt3_ */ + diff --git a/lapack-netlib/SRC/sgels.c b/lapack-netlib/SRC/sgels.c new file mode 100644 index 000000000..bb53bfac0 --- /dev/null +++ b/lapack-netlib/SRC/sgels.c @@ -0,0 +1,954 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGELS solves overdetermined or underdetermined systems for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGELS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* REAL A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGELS solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, or its transpose, using a QR or LQ */ +/* > factorization of A. It is assumed that A has full rank. */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* > an underdetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'T': the linear system involves A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if M >= N, A is overwritten by details of its QR */ +/* > factorization as returned by SGEQRF; */ +/* > if M < N, A is overwritten by details of its LQ */ +/* > factorization as returned by SGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'T'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors; the residual sum of squares for the */ +/* > solution in each column is given by the sum of squares of */ +/* > elements N+1 to M in that column; */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors; the residual sum of squares */ +/* > for the solution in each column is given by the sum of */ +/* > squares of elements M+1 to N in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS ) ). */ +/* > For optimal performance, */ +/* > LWORK >= f2cmax( 1, MN + f2cmax( MN, NRHS )*NB ). */ +/* > where MN = f2cmin(M,N) and NB is the optimum block size. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int sgels_(char *trans, integer *m, integer *n, integer * + nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + real anrm, bnrm; + integer brow; + logical tpsd; + integer i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + integer wsize; + real rwork[1]; + integer nb; + extern /* Subroutine */ int slabad_(real *, real *); + integer mn; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer scllen; + real bignum; + extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slascl_(char *, integer + *, integer *, real *, real *, integer *, integer *, real *, + integer *, integer *), sgeqrf_(integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), slaset_(char + *, integer *, integer *, real *, real *, real *, integer *); + real smlnum; + extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *), strtrs_(char *, char *, + char *, integer *, integer *, real *, integer *, real *, integer * + , integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (! (lsame_(trans, "N") || lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs); + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -10; + } + } + } + +/* Figure out optimal block size */ + + if (*info == 0 || *info == -10) { + + tpsd = TRUE_; + if (lsame_(trans, "N")) { + tpsd = FALSE_; + } + + if (*m >= *n) { + nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + if (tpsd) { +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "SORMQR", "LN", m, nrhs, n, & + c_n1, (ftnlen)6, (ftnlen)2); + nb = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "SORMQR", "LT", m, nrhs, n, & + c_n1, (ftnlen)6, (ftnlen)2); + nb = f2cmax(i__1,i__2); + } + } else { + nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + if (tpsd) { +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "SORMLQ", "LT", n, nrhs, m, & + c_n1, (ftnlen)6, (ftnlen)2); + nb = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__1, "SORMLQ", "LN", n, nrhs, m, & + c_n1, (ftnlen)6, (ftnlen)2); + nb = f2cmax(i__1,i__2); + } + } + +/* Computing MAX */ + i__1 = 1, i__2 = mn + f2cmax(mn,*nrhs) * nb; + wsize = f2cmax(i__1,i__2); + work[1] = (real) wsize; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELS ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + slaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); + return 0; + } + +/* Get machine parameters */ + + smlnum = slamch_("S") / slamch_("P"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, rwork); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + slaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); + goto L50; + } + + brow = *m; + if (tpsd) { + brow = *n; + } + bnrm = slange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* compute QR factorization of A */ + + i__1 = *lwork - mn; + sgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) + ; + +/* workspace at least N, optimally N*NB */ + + if (! tpsd) { + +/* Least-Squares Problem f2cmin || A * X - B || */ + +/* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */ + + i__1 = *lwork - mn; + sormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[ + 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + strtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *n; + + } else { + +/* Underdetermined system of equations A**T * X = B */ + +/* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + strtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + +/* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ + + i__1 = *lwork - mn; + sormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, & + work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *m; + + } + + } else { + +/* Compute LQ factorization of A */ + + i__1 = *lwork - mn; + sgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) + ; + +/* workspace at least M, optimally M*NB. */ + + if (! tpsd) { + +/* underdetermined system of equations A * X = B */ + +/* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + strtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] + , lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(M+1:N,1:NRHS) = 0 */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */ + + i__1 = *lwork - mn; + sormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[ + 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *n; + + } else { + +/* overdetermined system f2cmin || A**T * X - B || */ + +/* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ + + i__1 = *lwork - mn; + sormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, & + work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + strtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + slascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + slascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + slascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + slascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + +L50: + work[1] = (real) wsize; + + return 0; + +/* End of SGELS */ + +} /* sgels_ */ + diff --git a/lapack-netlib/SRC/sgelss.c b/lapack-netlib/SRC/sgelss.c new file mode 100644 index 000000000..6da9b3348 --- /dev/null +++ b/lapack-netlib/SRC/sgelss.c @@ -0,0 +1,1308 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGELSS solves overdetermined or underdetermined systems for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGELSS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, */ +/* WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */ +/* REAL RCOND */ +/* REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGELSS computes the minimum norm solution to a real linear least */ +/* > squares problem: */ +/* > */ +/* > Minimize 2-norm(| b - A*x |). */ +/* > */ +/* > using the singular value decomposition (SVD) of A. A is an M-by-N */ +/* > matrix which may be rank-deficient. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix */ +/* > X. */ +/* > */ +/* > The effective rank of A is determined by treating as zero those */ +/* > singular values which are less than RCOND times the largest singular */ +/* > value. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the first f2cmin(m,n) rows of A are overwritten with */ +/* > its right singular vectors, stored rowwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the M-by-NRHS right hand side matrix B. */ +/* > On exit, B is overwritten by the N-by-NRHS solution */ +/* > matrix X. If m >= n and RANK = n, the residual */ +/* > sum-of-squares for the solution in the i-th column is given */ +/* > by the sum of squares of elements n+1:m in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,f2cmax(M,N)). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is REAL array, dimension (f2cmin(M,N)) */ +/* > The singular values of A in decreasing order. */ +/* > The condition number of A in the 2-norm = S(1)/S(f2cmin(m,n)). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > RCOND is used to determine the effective rank of A. */ +/* > Singular values S(i) <= RCOND*S(1) are treated as zero. */ +/* > If RCOND < 0, machine precision is used instead. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RANK */ +/* > \verbatim */ +/* > RANK is INTEGER */ +/* > The effective rank of A, i.e., the number of singular values */ +/* > which are greater than RCOND*S(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1, and also: */ +/* > LWORK >= 3*f2cmin(M,N) + f2cmax( 2*f2cmin(M,N), f2cmax(M,N), NRHS ) */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: the algorithm for computing the SVD failed to converge; */ +/* > if INFO = i, i off-diagonal elements of an intermediate */ +/* > bidiagonal form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int sgelss_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * + rank, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + real r__1; + + /* Local variables */ + real anrm, bnrm; + integer itau, lwork_sgebrd__, lwork_sgeqrf__, i__, lwork_sorgbr__, + lwork_sormbr__, lwork_sormlq__, iascl, ibscl, lwork_sormqr__, + chunk; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + real sfmin; + integer minmn, maxmn; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + integer itaup, itauq; + extern /* Subroutine */ int srscl_(integer *, real *, real *, integer *); + integer mnthr, iwork; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + integer bl, ie, il; + extern /* Subroutine */ int slabad_(real *, real *); + integer mm, bdspac; + extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, integer *, integer *); + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + real bignum; + extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slascl_(char *, integer + *, integer *, real *, real *, integer *, integer *, real *, + integer *, integer *), sgeqrf_(integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), slacpy_(char + *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *), sbdsqr_(char *, integer *, integer *, + integer *, integer *, real *, real *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), sorgbr_( + char *, integer *, integer *, integer *, real *, integer *, real * + , real *, integer *, integer *); + integer ldwork; + extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , real *, integer *, integer *); + integer minwrk, maxwrk; + real smlnum; + extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + real dum[1], eps, thr; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --s; + --work; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + maxmn = f2cmax(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,maxmn)) { + *info = -7; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (minmn > 0) { + mm = *m; + mnthr = ilaenv_(&c__6, "SGELSS", " ", m, n, nrhs, &c_n1, (ftnlen) + 6, (ftnlen)1); + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than */ +/* columns */ + +/* Compute space needed for SGEQRF */ + sgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info); + lwork_sgeqrf__ = dum[0]; +/* Compute space needed for SORMQR */ + sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, dum, &b[ + b_offset], ldb, dum, &c_n1, info); + lwork_sormqr__ = dum[0]; + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_sgeqrf__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_sormqr__; + maxwrk = f2cmax(i__1,i__2); + } + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined */ + +/* Compute workspace needed for SBDSQR */ + +/* Computing MAX */ + i__1 = 1, i__2 = *n * 5; + bdspac = f2cmax(i__1,i__2); +/* Compute space needed for SGEBRD */ + sgebrd_(&mm, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, + &c_n1, info); + lwork_sgebrd__ = dum[0]; +/* Compute space needed for SORMBR */ + sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, dum, & + b[b_offset], ldb, dum, &c_n1, info); + lwork_sormbr__ = dum[0]; +/* Compute space needed for SORGBR */ + sorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, + info); + lwork_sorgbr__ = dum[0]; +/* Compute total workspace needed */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + lwork_sgebrd__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + lwork_sormbr__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + lwork_sorgbr__; + maxwrk = f2cmax(i__1,i__2); + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *nrhs; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = f2cmax(i__1, + i__2); + minwrk = f2cmax(i__1,bdspac); + maxwrk = f2cmax(minwrk,maxwrk); + } + if (*n > *m) { + +/* Compute workspace needed for SBDSQR */ + +/* Computing MAX */ + i__1 = 1, i__2 = *m * 5; + bdspac = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = f2cmax(i__1, + i__2); + minwrk = f2cmax(i__1,bdspac); + if (*n >= mnthr) { + +/* Path 2a - underdetermined, with many more columns */ +/* than rows */ + +/* Compute space needed for SGEBRD */ + sgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, + dum, &c_n1, info); + lwork_sgebrd__ = dum[0]; +/* Compute space needed for SORMBR */ + sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, dum, + &b[b_offset], ldb, dum, &c_n1, info); + lwork_sormbr__ = dum[0]; +/* Compute space needed for SORGBR */ + sorgbr_("P", m, m, m, &a[a_offset], lda, dum, dum, &c_n1, + info); + lwork_sorgbr__ = dum[0]; +/* Compute space needed for SORMLQ */ + sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, dum, &b[ + b_offset], ldb, dum, &c_n1, info); + lwork_sormlq__ = dum[0]; +/* Compute total workspace needed */ + maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + + lwork_sgebrd__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + + lwork_sormbr__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + + lwork_sorgbr__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + bdspac; + maxwrk = f2cmax(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = f2cmax(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + lwork_sormlq__; + maxwrk = f2cmax(i__1,i__2); + } else { + +/* Path 2 - underdetermined */ + +/* Compute space needed for SGEBRD */ + sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, + dum, &c_n1, info); + lwork_sgebrd__ = dum[0]; +/* Compute space needed for SORMBR */ + sormbr_("Q", "L", "T", m, nrhs, m, &a[a_offset], lda, dum, + &b[b_offset], ldb, dum, &c_n1, info); + lwork_sormbr__ = dum[0]; +/* Compute space needed for SORGBR */ + sorgbr_("P", m, n, m, &a[a_offset], lda, dum, dum, &c_n1, + info); + lwork_sorgbr__ = dum[0]; + maxwrk = *m * 3 + lwork_sgebrd__; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + lwork_sormbr__; + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + lwork_sorgbr__; + maxwrk = f2cmax(i__1,i__2); + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *nrhs; + maxwrk = f2cmax(i__1,i__2); + } + } + maxwrk = f2cmax(minwrk,maxwrk); + } + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELSS", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters */ + + eps = slamch_("P"); + sfmin = slamch_("S"); + smlnum = sfmin / eps; + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + slaset_("F", &i__1, nrhs, &c_b50, &c_b50, &b[b_offset], ldb); + slaset_("F", &minmn, &c__1, &c_b50, &c_b50, &s[1], &minmn); + *rank = 0; + goto L70; + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* Overdetermined case */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns */ + + mm = *n; + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__1 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, + info); + +/* Multiply B by transpose(Q) */ +/* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[iwork], &i__1, info); + +/* Zero out below R */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("L", &i__1, &i__2, &c_b50, &c_b50, &a[a_dim1 + 2], + lda); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ + + i__1 = *lwork - iwork + 1; + sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of R */ +/* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[iwork], &i__1, info); + +/* Generate right bidiagonalizing vectors of R in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__1 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], & + i__1, info); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration */ +/* multiply B by transpose of left singular vectors */ +/* compute right singular vectors in A */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, + dum, &c__1, &b[b_offset], ldb, &work[iwork], info); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + r__1 = *rcond * s[1]; + thr = f2cmax(r__1,sfmin); + if (*rcond < 0.f) { +/* Computing MAX */ + r__1 = eps * s[1]; + thr = f2cmax(r__1,sfmin); + } + *rank = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] > thr) { + srscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + slaset_("F", &c__1, nrhs, &c_b50, &c_b50, &b[i__ + b_dim1], + ldb); + } +/* L10: */ + } + +/* Multiply B by right singular vectors */ +/* (Workspace: need N, prefer N*NRHS) */ + + if (*lwork >= *ldb * *nrhs && *nrhs > 1) { + sgemm_("T", "N", n, nrhs, n, &c_b83, &a[a_offset], lda, &b[ + b_offset], ldb, &c_b50, &work[1], ldb); + slacpy_("G", n, nrhs, &work[1], ldb, &b[b_offset], ldb) + ; + } else if (*nrhs > 1) { + chunk = *lwork / *n; + i__1 = *nrhs; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = f2cmin(i__3,chunk); + sgemm_("T", "N", n, &bl, n, &c_b83, &a[a_offset], lda, &b[i__ + * b_dim1 + 1], ldb, &c_b50, &work[1], n); + slacpy_("G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb); +/* L20: */ + } + } else { + sgemv_("T", n, n, &c_b83, &a[a_offset], lda, &b[b_offset], &c__1, + &c_b50, &work[1], &c__1); + scopy_(n, &work[1], &c__1, &b[b_offset], &c__1); + } + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = *m, i__1 = (*m << 1) - 4, i__2 = f2cmax(i__2,i__1), i__2 = f2cmax( + i__2,*nrhs), i__1 = *n - *m * 3; + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + f2cmax(i__2,i__1)) { + +/* Path 2a - underdetermined, with many more columns than rows */ +/* and sufficient workspace for an efficient algorithm */ + + ldwork = *m; +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), i__3 = + f2cmax(i__3,*nrhs), i__4 = *n - *m * 3; + i__2 = (*m << 2) + *m * *lda + f2cmax(i__3,i__4), i__1 = *m * *lda + + *m + *m * *nrhs; + if (*lwork >= f2cmax(i__2,i__1)) { + ldwork = *lda; + } + itau = 1; + iwork = *m + 1; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, + info); + il = iwork; + +/* Copy L to WORK(IL), zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__2 = *m - 1; + i__1 = *m - 1; + slaset_("U", &i__2, &i__1, &c_b50, &c_b50, &work[il + ldwork], & + ldwork); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL) */ +/* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[iwork], &i__2, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of L */ +/* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[iwork], &i__2, info); + +/* Generate right bidiagonalizing vectors of R in WORK(IL) */ +/* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", m, m, m, &work[il], &ldwork, &work[itaup], &work[ + iwork], &i__2, info); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, */ +/* computing right singular vectors of L in WORK(IL) and */ +/* multiplying B by transpose of left singular vectors */ +/* (Workspace: need M*M+M+BDSPAC) */ + + sbdsqr_("U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], & + ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork] + , info); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + r__1 = *rcond * s[1]; + thr = f2cmax(r__1,sfmin); + if (*rcond < 0.f) { +/* Computing MAX */ + r__1 = eps * s[1]; + thr = f2cmax(r__1,sfmin); + } + *rank = 0; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + if (s[i__] > thr) { + srscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + slaset_("F", &c__1, nrhs, &c_b50, &c_b50, &b[i__ + b_dim1] + , ldb); + } +/* L30: */ + } + iwork = ie; + +/* Multiply B by right singular vectors of L in WORK(IL) */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */ + + if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) { + sgemm_("T", "N", m, nrhs, m, &c_b83, &work[il], &ldwork, &b[ + b_offset], ldb, &c_b50, &work[iwork], ldb); + slacpy_("G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb); + } else if (*nrhs > 1) { + chunk = (*lwork - iwork + 1) / *m; + i__2 = *nrhs; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = f2cmin(i__3,chunk); + sgemm_("T", "N", m, &bl, m, &c_b83, &work[il], &ldwork, & + b[i__ * b_dim1 + 1], ldb, &c_b50, &work[iwork], m); + slacpy_("G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1] + , ldb); +/* L40: */ + } + } else { + sgemv_("T", m, m, &c_b83, &work[il], &ldwork, &b[b_dim1 + 1], + &c__1, &c_b50, &work[iwork], &c__1); + scopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1); + } + +/* Zero out below first M rows of B */ + + i__1 = *n - *m; + slaset_("F", &i__1, nrhs, &c_b50, &c_b50, &b[*m + 1 + b_dim1], + ldb); + iwork = itau + *m; + +/* Multiply transpose(Q) by B */ +/* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[iwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__1 = *lwork - iwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors */ +/* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[iwork], &i__1, info); + +/* Generate right bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__1 = *lwork - iwork + 1; + sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__1, info); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, */ +/* computing right singular vectors of A in A and */ +/* multiplying B by transpose of left singular vectors */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], + lda, dum, &c__1, &b[b_offset], ldb, &work[iwork], info); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + r__1 = *rcond * s[1]; + thr = f2cmax(r__1,sfmin); + if (*rcond < 0.f) { +/* Computing MAX */ + r__1 = eps * s[1]; + thr = f2cmax(r__1,sfmin); + } + *rank = 0; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] > thr) { + srscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + slaset_("F", &c__1, nrhs, &c_b50, &c_b50, &b[i__ + b_dim1] + , ldb); + } +/* L50: */ + } + +/* Multiply B by right singular vectors of A */ +/* (Workspace: need N, prefer N*NRHS) */ + + if (*lwork >= *ldb * *nrhs && *nrhs > 1) { + sgemm_("T", "N", n, nrhs, m, &c_b83, &a[a_offset], lda, &b[ + b_offset], ldb, &c_b50, &work[1], ldb); + slacpy_("F", n, nrhs, &work[1], ldb, &b[b_offset], ldb); + } else if (*nrhs > 1) { + chunk = *lwork / *n; + i__1 = *nrhs; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = f2cmin(i__3,chunk); + sgemm_("T", "N", n, &bl, m, &c_b83, &a[a_offset], lda, &b[ + i__ * b_dim1 + 1], ldb, &c_b50, &work[1], n); + slacpy_("F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], + ldb); +/* L60: */ + } + } else { + sgemv_("T", m, n, &c_b83, &a[a_offset], lda, &b[b_offset], & + c__1, &c_b50, &work[1], &c__1); + scopy_(n, &work[1], &c__1, &b[b_offset], &c__1); + } + } + } + +/* Undo scaling */ + + if (iascl == 1) { + slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L70: + work[1] = (real) maxwrk; + return 0; + +/* End of SGELSS */ + +} /* sgelss_ */ + diff --git a/lapack-netlib/SRC/sgelsy.c b/lapack-netlib/SRC/sgelsy.c new file mode 100644 index 000000000..def845983 --- /dev/null +++ b/lapack-netlib/SRC/sgelsy.c @@ -0,0 +1,939 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGELSY solves overdetermined or underdetermined systems for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGELSY + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, */ +/* WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */ +/* REAL RCOND */ +/* INTEGER JPVT( * ) */ +/* REAL A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGELSY computes the minimum-norm solution to a real linear least */ +/* > squares problem: */ +/* > minimize || A * X - B || */ +/* > using a complete orthogonal factorization of A. A is an M-by-N */ +/* > matrix which may be rank-deficient. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > */ +/* > The routine first computes a QR factorization with column pivoting: */ +/* > A * P = Q * [ R11 R12 ] */ +/* > [ 0 R22 ] */ +/* > with R11 defined as the largest leading submatrix whose estimated */ +/* > condition number is less than 1/RCOND. The order of R11, RANK, */ +/* > is the effective rank of A. */ +/* > */ +/* > Then, R22 is considered to be negligible, and R12 is annihilated */ +/* > by orthogonal transformations from the right, arriving at the */ +/* > complete orthogonal factorization: */ +/* > A * P = Q * [ T11 0 ] * Z */ +/* > [ 0 0 ] */ +/* > The minimum-norm solution is then */ +/* > X = P * Z**T [ inv(T11)*Q1**T*B ] */ +/* > [ 0 ] */ +/* > where Q1 consists of the first RANK columns of Q. */ +/* > */ +/* > This routine is basically identical to the original xGELSX except */ +/* > three differences: */ +/* > o The call to the subroutine xGEQPF has been substituted by the */ +/* > the call to the subroutine xGEQP3. This subroutine is a Blas-3 */ +/* > version of the QR factorization with column pivoting. */ +/* > o Matrix B (the right hand side) is updated with Blas-3. */ +/* > o The permutation of matrix B (the right hand side) is faster and */ +/* > more simple. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A has been overwritten by details of its */ +/* > complete orthogonal factorization. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the M-by-NRHS right hand side matrix B. */ +/* > On exit, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] JPVT */ +/* > \verbatim */ +/* > JPVT is INTEGER array, dimension (N) */ +/* > On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */ +/* > to the front of AP, otherwise column i is a free column. */ +/* > On exit, if JPVT(i) = k, then the i-th column of AP */ +/* > was the k-th column of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > RCOND is used to determine the effective rank of A, which */ +/* > is defined as the order of the largest leading triangular */ +/* > submatrix R11 in the QR factorization with pivoting of A, */ +/* > whose estimated condition number < 1/RCOND. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RANK */ +/* > \verbatim */ +/* > RANK is INTEGER */ +/* > The effective rank of A, i.e., the order of the submatrix */ +/* > R11. This is the same as the order of the submatrix T11 */ +/* > in the complete orthogonal factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > The unblocked strategy requires that: */ +/* > LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */ +/* > where MN = f2cmin( M, N ). */ +/* > The block algorithm requires that: */ +/* > LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */ +/* > where NB is an upper bound on the blocksize returned */ +/* > by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR, */ +/* > and SORMRZ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n */ +/* > E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */ +/* > G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgelsy_(integer *m, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, + integer *rank, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + real anrm, bnrm, smin, smax; + integer i__, j, iascl, ibscl, ismin, ismax; + real c1, c2; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + real wsize, s1, s2; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), slaic1_(integer *, integer *, + real *, real *, real *, real *, real *, real *, real *), sgeqp3_( + integer *, integer *, real *, integer *, integer *, real *, real * + , integer *, integer *); + integer nb; + extern /* Subroutine */ int slabad_(real *, real *); + integer mn; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + integer lwkmin, nb1, nb2, nb3, nb4; + real sminpr, smaxpr, smlnum; + integer lwkopt; + logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *), sormrz_(char *, char *, + integer *, integer *, integer *, integer *, real *, integer *, + real *, real *, integer *, real *, integer *, integer *), stzrzf_(integer *, integer *, real *, integer *, real *, + real *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --jpvt; + --work; + + /* Function Body */ + mn = f2cmin(*m,*n); + ismin = mn + 1; + ismax = (mn << 1) + 1; + +/* Test the input arguments. */ + + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -7; + } + } + +/* Figure out optimal block size */ + + if (*info == 0) { + if (mn == 0 || *nrhs == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb2 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb3 = ilaenv_(&c__1, "SORMQR", " ", m, n, nrhs, &c_n1, (ftnlen)6, + (ftnlen)1); + nb4 = ilaenv_(&c__1, "SORMRQ", " ", m, n, nrhs, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3); + nb = f2cmax(i__1,nb4); +/* Computing MAX */ + i__1 = mn << 1, i__2 = *n + 1, i__1 = f2cmax(i__1,i__2), i__2 = mn + + *nrhs; + lwkmin = mn + f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = lwkmin, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = f2cmax( + i__1,i__2), i__2 = (mn << 1) + nb * *nrhs; + lwkopt = f2cmax(i__1,i__2); + } + work[1] = (real) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELSY", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (mn == 0 || *nrhs == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters */ + + smlnum = slamch_("S") / slamch_("P"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax entries outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = f2cmax(*m,*n); + slaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); + *rank = 0; + goto L70; + } + + bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* Compute QR factorization with column pivoting of A: */ +/* A * P = Q * R */ + + i__1 = *lwork - mn; + sgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1, + info); + wsize = mn + work[mn + 1]; + +/* workspace: MN+2*N+NB*(N+1). */ +/* Details of Householder rotations stored in WORK(1:MN). */ + +/* Determine RANK using incremental condition estimation */ + + work[ismin] = 1.f; + work[ismax] = 1.f; + smax = (r__1 = a[a_dim1 + 1], abs(r__1)); + smin = smax; + if ((r__1 = a[a_dim1 + 1], abs(r__1)) == 0.f) { + *rank = 0; + i__1 = f2cmax(*m,*n); + slaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); + goto L70; + } else { + *rank = 1; + } + +L10: + if (*rank < mn) { + i__ = *rank + 1; + slaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[ + i__ + i__ * a_dim1], &sminpr, &s1, &c1); + slaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[ + i__ + i__ * a_dim1], &smaxpr, &s2, &c2); + + if (smaxpr * *rcond <= sminpr) { + i__1 = *rank; + for (i__ = 1; i__ <= i__1; ++i__) { + work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1]; + work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1]; +/* L20: */ + } + work[ismin + *rank] = c1; + work[ismax + *rank] = c2; + smin = sminpr; + smax = smaxpr; + ++(*rank); + goto L10; + } + } + +/* workspace: 3*MN. */ + +/* Logically partition R = [ R11 R12 ] */ +/* [ 0 R22 ] */ +/* where R11 = R(1:RANK,1:RANK) */ + +/* [R11,R12] = [ T11, 0 ] * Y */ + + if (*rank < *n) { + i__1 = *lwork - (mn << 1); + stzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + + 1], &i__1, info); + } + +/* workspace: 2*MN. */ +/* Details of Householder rotations stored in WORK(MN+1:2*MN) */ + +/* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */ + + i__1 = *lwork - (mn << 1); + sormqr_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], & + b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info); +/* Computing MAX */ + r__1 = wsize, r__2 = (mn << 1) + work[(mn << 1) + 1]; + wsize = f2cmax(r__1,r__2); + +/* workspace: 2*MN+NB*NRHS. */ + +/* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */ + + strsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b54, & + a[a_offset], lda, &b[b_offset], ldb); + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *rank + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) */ + + if (*rank < *n) { + i__1 = *n - *rank; + i__2 = *lwork - (mn << 1); + sormrz_("Left", "Transpose", n, nrhs, rank, &i__1, &a[a_offset], lda, + &work[mn + 1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__2, + info); + } + +/* workspace: 2*MN+NRHS. */ + +/* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[jpvt[i__]] = b[i__ + j * b_dim1]; +/* L50: */ + } + scopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1); +/* L60: */ + } + +/* workspace: N. */ + +/* Undo scaling */ + + if (iascl == 1) { + slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + slascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], + lda, info); + } else if (iascl == 2) { + slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + slascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], + lda, info); + } + if (ibscl == 1) { + slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L70: + work[1] = (real) lwkopt; + + return 0; + +/* End of SGELSY */ + +} /* sgelsy_ */ + diff --git a/lapack-netlib/SRC/sgemlq.c b/lapack-netlib/SRC/sgemlq.c new file mode 100644 index 000000000..6b6b1853b --- /dev/null +++ b/lapack-netlib/SRC/sgemlq.c @@ -0,0 +1,681 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEMLQ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, */ +/* $ TSIZE, C, LDC, WORK, LWORK, INFO ) */ + + +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */ +/* REAL A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) */ + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEMLQ overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'T': Q**T * C C * Q**T */ +/* > where Q is a real orthogonal matrix defined as the product */ +/* > of blocked elementary reflectors computed by short wide LQ */ +/* > factorization (SGELQ) */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'T': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension */ +/* > (LDA,M) if SIDE = 'L', */ +/* > (LDA,N) if SIDE = 'R' */ +/* > Part of the data structure to represent Q as returned by DGELQ. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,K). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (MAX(5,TSIZE)). */ +/* > Part of the data structure to represent Q as returned by SGELQ. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TSIZE */ +/* > \verbatim */ +/* > TSIZE is INTEGER */ +/* > The dimension of the array T. TSIZE >= 5. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) REAL array, dimension (MAX(1,LWORK)) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1, then a workspace query is assumed. The routine */ +/* > only calculates the size of the WORK array, returns this */ +/* > value as WORK(1), and no error message related to WORK */ +/* > is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \par Further Details */ +/* ==================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > These details are particular for this LAPACK implementation. Users should not */ +/* > take them for granted. These details may change in the future, and are not likely */ +/* > true for another LAPACK implementation. These details are relevant if one wants */ +/* > to try to understand the code. They are not part of the interface. */ +/* > */ +/* > In this version, */ +/* > */ +/* > T(2): row block size (MB) */ +/* > T(3): column block size (NB) */ +/* > T(6:TSIZE): data structure needed for Q, computed by */ +/* > SLASWLQ or SGELQT */ +/* > */ +/* > Depending on the matrix dimensions M and N, and row and column */ +/* > block sizes MB and NB returned by ILAENV, SGELQ will use either */ +/* > SLASWLQ (if the matrix is wide-and-short) or SGELQT to compute */ +/* > the LQ factorization. */ +/* > This version of SGEMLQ will use either SLAMSWLQ or SGEMLQT to */ +/* > multiply matrix Q by another matrix. */ +/* > Further Details in SLAMSWLQ or SGEMLQT. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgemlq_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *t, integer *tsize, real *c__, + integer *ldc, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1; + + /* Local variables */ + logical left, tran; + extern /* Subroutine */ int slamswlq_(char *, char *, integer *, integer * + , integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *, integer *); + extern logical lsame_(char *, char *); + logical right; + integer mb, nb, mn, lw, nblcks; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran, lquery; + extern /* Subroutine */ int sgemlqt_(char *, char *, integer *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real + *, integer *, real *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --t; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + lquery = *lwork == -1; + notran = lsame_(trans, "N"); + tran = lsame_(trans, "T"); + left = lsame_(side, "L"); + right = lsame_(side, "R"); + + mb = (integer) t[2]; + nb = (integer) t[3]; + if (left) { + lw = *n * mb; + mn = *m; + } else { + lw = *m * mb; + mn = *n; + } + + if (nb > *k && mn > *k) { + if ((mn - *k) % (nb - *k) == 0) { + nblcks = (mn - *k) / (nb - *k); + } else { + nblcks = (mn - *k) / (nb - *k) + 1; + } + } else { + nblcks = 1; + } + + *info = 0; + if (! left && ! right) { + *info = -1; + } else if (! tran && ! notran) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > mn) { + *info = -5; + } else if (*lda < f2cmax(1,*k)) { + *info = -7; + } else if (*tsize < 5) { + *info = -9; + } else if (*ldc < f2cmax(1,*m)) { + *info = -11; + } else if (*lwork < f2cmax(1,lw) && ! lquery) { + *info = -13; + } + + if (*info == 0) { + work[1] = (real) lw; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEMLQ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*k) == 0) { + return 0; + } + +/* Computing MAX */ + i__1 = f2cmax(*m,*n); + if (left && *m <= *k || right && *n <= *k || nb <= *k || nb >= f2cmax(i__1,* + k)) { + sgemlqt_(side, trans, m, n, k, &mb, &a[a_offset], lda, &t[6], &mb, & + c__[c_offset], ldc, &work[1], info); + } else { + slamswlq_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], & + mb, &c__[c_offset], ldc, &work[1], lwork, info); + } + + work[1] = (real) lw; + + return 0; + +/* End of SGEMLQ */ + +} /* sgemlq_ */ + diff --git a/lapack-netlib/SRC/sgemlqt.c b/lapack-netlib/SRC/sgemlqt.c new file mode 100644 index 000000000..6b2ae2945 --- /dev/null +++ b/lapack-netlib/SRC/sgemlqt.c @@ -0,0 +1,686 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEMLQT */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, */ +/* C, LDC, WORK, INFO ) */ + +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDV, LDC, M, N, MB, LDT */ +/* REAL V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEMLQT overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q C C Q */ +/* > TRANS = 'T': Q**T C C Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of K */ +/* > elementary reflectors: */ +/* > */ +/* > Q = H(1) H(2) . . . H(K) = I - V T V**T */ +/* > */ +/* > generated using the compact WY representation as returned by DGELQT. */ +/* > */ +/* > Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'C': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MB */ +/* > \verbatim */ +/* > MB is INTEGER */ +/* > The block size used for the storage of T. K >= MB >= 1. */ +/* > This must be the same value of MB used to generate T */ +/* > in DGELQT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is REAL array, dimension */ +/* > (LDV,M) if SIDE = 'L', */ +/* > (LDV,N) if SIDE = 'R' */ +/* > The i-th row must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > DGELQT in the first K rows of its array argument A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,K). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT,K) */ +/* > The upper triangular factors of the block reflectors */ +/* > as returned by DGELQT, stored as a MB-by-K matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= MB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array. The dimension of */ +/* > WORK is N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgemlqt_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *mb, real *v, integer *ldv, real *t, integer * + ldt, real *c__, integer *ldc, real *work, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, + i__3, i__4; + + /* Local variables */ + logical left, tran; + integer i__; + extern logical lsame_(char *, char *); + logical right; + integer ib, kf; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen); + logical notran; + integer ldwork; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + right = lsame_(side, "R"); + tran = lsame_(trans, "T"); + notran = lsame_(trans, "N"); + + if (left) { + ldwork = f2cmax(1,*n); + } else if (right) { + ldwork = f2cmax(1,*m); + } + if (! left && ! right) { + *info = -1; + } else if (! tran && ! notran) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0) { + *info = -5; + } else if (*mb < 1 || *mb > *k && *k > 0) { + *info = -6; + } else if (*ldv < f2cmax(1,*k)) { + *info = -8; + } else if (*ldt < *mb) { + *info = -10; + } else if (*ldc < f2cmax(1,*m)) { + *info = -12; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEMLQT", &i__1, (ftnlen)7); + return 0; + } + + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran) { + + i__1 = *k; + i__2 = *mb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = *mb, i__4 = *k - i__ + 1; + ib = f2cmin(i__3,i__4); + i__3 = *m - i__ + 1; + slarfb_("L", "T", "F", "R", &i__3, n, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, + &work[1], &ldwork); + } + + } else if (right && tran) { + + i__2 = *k; + i__1 = *mb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = *mb, i__4 = *k - i__ + 1; + ib = f2cmin(i__3,i__4); + i__3 = *n - i__ + 1; + slarfb_("R", "N", "F", "R", m, &i__3, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], + ldc, &work[1], &ldwork); + } + + } else if (left && tran) { + + kf = (*k - 1) / *mb * *mb + 1; + i__1 = -(*mb); + for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = *mb, i__3 = *k - i__ + 1; + ib = f2cmin(i__2,i__3); + i__2 = *m - i__ + 1; + slarfb_("L", "N", "F", "R", &i__2, n, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, + &work[1], &ldwork); + } + + } else if (right && notran) { + + kf = (*k - 1) / *mb * *mb + 1; + i__1 = -(*mb); + for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = *mb, i__3 = *k - i__ + 1; + ib = f2cmin(i__2,i__3); + i__2 = *n - i__ + 1; + slarfb_("R", "T", "F", "R", m, &i__2, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], + ldc, &work[1], &ldwork); + } + + } + + return 0; + +/* End of SGEMLQT */ + +} /* sgemlqt_ */ + diff --git a/lapack-netlib/SRC/sgemqr.c b/lapack-netlib/SRC/sgemqr.c new file mode 100644 index 000000000..82248d4ec --- /dev/null +++ b/lapack-netlib/SRC/sgemqr.c @@ -0,0 +1,683 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEMQR */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, */ +/* $ TSIZE, C, LDC, WORK, LWORK, INFO ) */ + + +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC */ +/* REAL A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) */ + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEMQR overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'T': Q**T * C C * Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product */ +/* > of blocked elementary reflectors computed by tall skinny */ +/* > QR factorization (SGEQR) */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'T': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,K) */ +/* > Part of the data structure to represent Q as returned by SGEQR. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= f2cmax(1,M); */ +/* > if SIDE = 'R', LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (MAX(5,TSIZE)). */ +/* > Part of the data structure to represent Q as returned by SGEQR. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TSIZE */ +/* > \verbatim */ +/* > TSIZE is INTEGER */ +/* > The dimension of the array T. TSIZE >= 5. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) REAL array, dimension (MAX(1,LWORK)) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1, then a workspace query is assumed. The routine */ +/* > only calculates the size of the WORK array, returns this */ +/* > value as WORK(1), and no error message related to WORK */ +/* > is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \par Further Details */ +/* ==================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > These details are particular for this LAPACK implementation. Users should not */ +/* > take them for granted. These details may change in the future, and are not likely */ +/* > true for another LAPACK implementation. These details are relevant if one wants */ +/* > to try to understand the code. They are not part of the interface. */ +/* > */ +/* > In this version, */ +/* > */ +/* > T(2): row block size (MB) */ +/* > T(3): column block size (NB) */ +/* > T(6:TSIZE): data structure needed for Q, computed by */ +/* > SLATSQR or SGEQRT */ +/* > */ +/* > Depending on the matrix dimensions M and N, and row and column */ +/* > block sizes MB and NB returned by ILAENV, SGEQR will use either */ +/* > SLATSQR (if the matrix is tall-and-skinny) or SGEQRT to compute */ +/* > the QR factorization. */ +/* > This version of SGEMQR will use either SLAMTSQR or SGEMQRT to */ +/* > multiply matrix Q by another matrix. */ +/* > Further Details in SLAMTSQR or SGEMQRT. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgemqr_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *t, integer *tsize, real *c__, + integer *ldc, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1; + + /* Local variables */ + logical left, tran; + extern /* Subroutine */ int slamtsqr_(char *, char *, integer *, integer * + , integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *, integer *); + extern logical lsame_(char *, char *); + logical right; + integer mb, nb, mn, lw, nblcks; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran, lquery; + extern /* Subroutine */ int sgemqrt_(char *, char *, integer *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real + *, integer *, real *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --t; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + lquery = *lwork == -1; + notran = lsame_(trans, "N"); + tran = lsame_(trans, "T"); + left = lsame_(side, "L"); + right = lsame_(side, "R"); + + mb = (integer) t[2]; + nb = (integer) t[3]; + if (left) { + lw = *n * nb; + mn = *m; + } else { + lw = mb * nb; + mn = *n; + } + + if (mb > *k && mn > *k) { + if ((mn - *k) % (mb - *k) == 0) { + nblcks = (mn - *k) / (mb - *k); + } else { + nblcks = (mn - *k) / (mb - *k) + 1; + } + } else { + nblcks = 1; + } + + *info = 0; + if (! left && ! right) { + *info = -1; + } else if (! tran && ! notran) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > mn) { + *info = -5; + } else if (*lda < f2cmax(1,mn)) { + *info = -7; + } else if (*tsize < 5) { + *info = -9; + } else if (*ldc < f2cmax(1,*m)) { + *info = -11; + } else if (*lwork < f2cmax(1,lw) && ! lquery) { + *info = -13; + } + + if (*info == 0) { + work[1] = (real) lw; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEMQR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*k) == 0) { + return 0; + } + +/* Computing MAX */ + i__1 = f2cmax(*m,*n); + if (left && *m <= *k || right && *n <= *k || mb <= *k || mb >= f2cmax(i__1,* + k)) { + sgemqrt_(side, trans, m, n, k, &nb, &a[a_offset], lda, &t[6], &nb, & + c__[c_offset], ldc, &work[1], info); + } else { + slamtsqr_(side, trans, m, n, k, &mb, &nb, &a[a_offset], lda, &t[6], & + nb, &c__[c_offset], ldc, &work[1], lwork, info); + } + + work[1] = (real) lw; + + return 0; + +/* End of SGEMQR */ + +} /* sgemqr_ */ + diff --git a/lapack-netlib/SRC/sgemqrt.c b/lapack-netlib/SRC/sgemqrt.c new file mode 100644 index 000000000..42d3ce051 --- /dev/null +++ b/lapack-netlib/SRC/sgemqrt.c @@ -0,0 +1,706 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEMQRT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEMQRT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, */ +/* C, LDC, WORK, INFO ) */ + +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDV, LDC, M, N, NB, LDT */ +/* REAL V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEMQRT overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q C C Q */ +/* > TRANS = 'T': Q**T C C Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of K */ +/* > elementary reflectors: */ +/* > */ +/* > Q = H(1) H(2) . . . H(K) = I - V T V**T */ +/* > */ +/* > generated using the compact WY representation as returned by SGEQRT. */ +/* > */ +/* > Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'T': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB */ +/* > \verbatim */ +/* > NB is INTEGER */ +/* > The block size used for the storage of T. K >= NB >= 1. */ +/* > This must be the same value of NB used to generate T */ +/* > in CGEQRT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is REAL array, dimension (LDV,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > CGEQRT in the first K columns of its array argument A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If SIDE = 'L', LDA >= f2cmax(1,M); */ +/* > if SIDE = 'R', LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT,K) */ +/* > The upper triangular factors of the block reflectors */ +/* > as returned by CGEQRT, stored as a NB-by-N matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= NB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array. The dimension of WORK is */ +/* > N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgemqrt_(char *side, char *trans, integer *m, integer *n, + integer *k, integer *nb, real *v, integer *ldv, real *t, integer * + ldt, real *c__, integer *ldc, real *work, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, c_dim1, c_offset, t_dim1, t_offset, i__1, i__2, + i__3, i__4; + + /* Local variables */ + logical left, tran; + integer i__, q; + extern logical lsame_(char *, char *); + logical right; + integer ib, kf; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen); + logical notran; + integer ldwork; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1 * 1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + right = lsame_(side, "R"); + tran = lsame_(trans, "T"); + notran = lsame_(trans, "N"); + + if (left) { + ldwork = f2cmax(1,*n); + q = *m; + } else if (right) { + ldwork = f2cmax(1,*m); + q = *n; + } + if (! left && ! right) { + *info = -1; + } else if (! tran && ! notran) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > q) { + *info = -5; + } else if (*nb < 1 || *nb > *k && *k > 0) { + *info = -6; + } else if (*ldv < f2cmax(1,q)) { + *info = -8; + } else if (*ldt < *nb) { + *info = -10; + } else if (*ldc < f2cmax(1,*m)) { + *info = -12; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEMQRT", &i__1, (ftnlen)7); + return 0; + } + + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && tran) { + + i__1 = *k; + i__2 = *nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = *nb, i__4 = *k - i__ + 1; + ib = f2cmin(i__3,i__4); + i__3 = *m - i__ + 1; + slarfb_("L", "T", "F", "C", &i__3, n, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, + &work[1], &ldwork); + } + + } else if (right && notran) { + + i__2 = *k; + i__1 = *nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = *nb, i__4 = *k - i__ + 1; + ib = f2cmin(i__3,i__4); + i__3 = *n - i__ + 1; + slarfb_("R", "N", "F", "C", m, &i__3, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], + ldc, &work[1], &ldwork); + } + + } else if (left && notran) { + + kf = (*k - 1) / *nb * *nb + 1; + i__1 = -(*nb); + for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = *nb, i__3 = *k - i__ + 1; + ib = f2cmin(i__2,i__3); + i__2 = *m - i__ + 1; + slarfb_("L", "N", "F", "C", &i__2, n, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ + c_dim1], ldc, + &work[1], &ldwork); + } + + } else if (right && tran) { + + kf = (*k - 1) / *nb * *nb + 1; + i__1 = -(*nb); + for (i__ = kf; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = *nb, i__3 = *k - i__ + 1; + ib = f2cmin(i__2,i__3); + i__2 = *n - i__ + 1; + slarfb_("R", "T", "F", "C", m, &i__2, &ib, &v[i__ + i__ * v_dim1], + ldv, &t[i__ * t_dim1 + 1], ldt, &c__[i__ * c_dim1 + 1], + ldc, &work[1], &ldwork); + } + + } + + return 0; + +/* End of SGEMQRT */ + +} /* sgemqrt_ */ + diff --git a/lapack-netlib/SRC/sgeql2.c b/lapack-netlib/SRC/sgeql2.c new file mode 100644 index 000000000..e785c39ea --- /dev/null +++ b/lapack-netlib/SRC/sgeql2.c @@ -0,0 +1,591 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQL2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQL2 computes a QL factorization of a real m by n matrix A: */ +/* > A = Q * L. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, if m >= n, the lower triangle of the subarray */ +/* > A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; */ +/* > if m <= n, the elements on and below the (n-m)-th */ +/* > superdiagonal contain the m by n lower trapezoidal matrix L; */ +/* > the remaining elements, with the array TAU, represent the */ +/* > orthogonal matrix Q as a product of elementary reflectors */ +/* > (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */ +/* > A(1:m-k+i-1,n-k+i), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeql2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *, ftnlen), slarfg_(integer *, real *, real *, + integer *, real *); + real aii; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQL2", &i__1, (ftnlen)6); + return 0; + } + + k = f2cmin(*m,*n); + + for (i__ = k; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(1:m-k+i-1,n-k+i) */ + + i__1 = *m - k + i__; + slarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[(*n - k + + i__) * a_dim1 + 1], &c__1, &tau[i__]); + +/* Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left */ + + aii = a[*m - k + i__ + (*n - k + i__) * a_dim1]; + a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.f; + i__1 = *m - k + i__; + i__2 = *n - k + i__ - 1; + slarf_("Left", &i__1, &i__2, &a[(*n - k + i__) * a_dim1 + 1], &c__1, & + tau[i__], &a[a_offset], lda, &work[1]); + a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of SGEQL2 */ + +} /* sgeql2_ */ + diff --git a/lapack-netlib/SRC/sgeqlf.c b/lapack-netlib/SRC/sgeqlf.c new file mode 100644 index 000000000..c792696c3 --- /dev/null +++ b/lapack-netlib/SRC/sgeqlf.c @@ -0,0 +1,709 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQLF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQLF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQLF computes a QL factorization of a real M-by-N matrix A: */ +/* > A = Q * L. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if m >= n, the lower triangle of the subarray */ +/* > A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; */ +/* > if m <= n, the elements on and below the (n-m)-th */ +/* > superdiagonal contain the M-by-N lower trapezoidal matrix L; */ +/* > the remaining elements, with the array TAU, represent the */ +/* > orthogonal matrix Q as a product of elementary reflectors */ +/* > (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in */ +/* > A(1:m-k+i-1,n-k+i), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqlf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo; + extern /* Subroutine */ int sgeql2_(integer *, integer *, real *, integer + *, real *, real *, integer *); + integer ib, nb, ki, kk, mu, nu, nx; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + + if (*info == 0) { + k = f2cmin(*m,*n); + if (k == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, "SGEQLF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + } + work[1] = (real) lwkopt; + + if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -7; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQLF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + return 0; + } + + nbmin = 2; + nx = 1; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQLF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQLF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially. */ +/* The last kk columns are handled by the block method. */ + + ki = (k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = k, i__2 = ki + nb; + kk = f2cmin(i__1,i__2); + + i__1 = k - kk + 1; + i__2 = -nb; + for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ + += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the QL factorization of the current block */ +/* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) */ + + i__3 = *m - k + i__ + ib - 1; + sgeql2_(&i__3, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, &tau[ + i__], &work[1], &iinfo); + if (*n - k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *m - k + i__ + ib - 1; + slarft_("Backward", "Columnwise", &i__3, &ib, &a[(*n - k + + i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ + + i__3 = *m - k + i__ + ib - 1; + i__4 = *n - k + i__ - 1; + slarfb_("Left", "Transpose", "Backward", "Columnwise", &i__3, + &i__4, &ib, &a[(*n - k + i__) * a_dim1 + 1], lda, & + work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], & + ldwork); + } +/* L10: */ + } + mu = *m - k + i__ + nb - 1; + nu = *n - k + i__ + nb - 1; + } else { + mu = *m; + nu = *n; + } + +/* Use unblocked code to factor the last or only block */ + + if (mu > 0 && nu > 0) { + sgeql2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + } + + work[1] = (real) iws; + return 0; + +/* End of SGEQLF */ + +} /* sgeqlf_ */ + diff --git a/lapack-netlib/SRC/sgeqp3.c b/lapack-netlib/SRC/sgeqp3.c new file mode 100644 index 000000000..d397dba70 --- /dev/null +++ b/lapack-netlib/SRC/sgeqp3.c @@ -0,0 +1,789 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQP3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQP3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* INTEGER JPVT( * ) */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQP3 computes a QR factorization with column pivoting of a */ +/* > matrix A: A*P = Q*R using Level 3 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the upper triangle of the array contains the */ +/* > f2cmin(M,N)-by-N upper trapezoidal matrix R; the elements below */ +/* > the diagonal, together with the array TAU, represent the */ +/* > orthogonal matrix Q as a product of f2cmin(M,N) elementary */ +/* > reflectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] JPVT */ +/* > \verbatim */ +/* > JPVT is INTEGER array, dimension (N) */ +/* > On entry, if JPVT(J).ne.0, the J-th column of A is permuted */ +/* > to the front of A*P (a leading column); if JPVT(J)=0, */ +/* > the J-th column of A is a free column. */ +/* > On exit, if JPVT(J)=K, then the J-th column of A*P was the */ +/* > the K-th column of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO=0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 3*N+1. */ +/* > For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB */ +/* > is the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real/complex vector */ +/* > with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in */ +/* > A(i+1:m,i), and tau in TAU(i). */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ +/* > X. Sun, Computer Science Dept., Duke University, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqp3_(integer *m, integer *n, real *a, integer *lda, + integer *jpvt, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer nfxd; + extern real snrm2_(integer *, real *, integer *); + integer j, nbmin, minmn, minws; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), slaqp2_(integer *, integer *, integer *, real *, + integer *, integer *, real *, real *, real *, real *); + integer jb, na, nb, sm, sn, nx; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *); + integer topbmn, sminmn; + extern /* Subroutine */ int slaqps_(integer *, integer *, integer *, + integer *, integer *, real *, integer *, integer *, real *, real * + , real *, real *, real *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + integer fjb, iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + +/* Test input arguments */ +/* ==================== */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --jpvt; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + + if (*info == 0) { + minmn = f2cmin(*m,*n); + if (minmn == 0) { + iws = 1; + lwkopt = 1; + } else { + iws = *n * 3 + 1; + nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = (*n << 1) + (*n + 1) * nb; + } + work[1] = (real) lwkopt; + + if (*lwork < iws && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQP3", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Move initial columns up front. */ + + nfxd = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (jpvt[j] != 0) { + if (j != nfxd) { + sswap_(m, &a[j * a_dim1 + 1], &c__1, &a[nfxd * a_dim1 + 1], & + c__1); + jpvt[j] = jpvt[nfxd]; + jpvt[nfxd] = j; + } else { + jpvt[j] = j; + } + ++nfxd; + } else { + jpvt[j] = j; + } +/* L10: */ + } + --nfxd; + +/* Factorize fixed columns */ +/* ======================= */ + +/* Compute the QR factorization of fixed columns and update */ +/* remaining columns. */ + + if (nfxd > 0) { + na = f2cmin(*m,nfxd); +/* CC CALL SGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) */ + sgeqrf_(m, &na, &a[a_offset], lda, &tau[1], &work[1], lwork, info); +/* Computing MAX */ + i__1 = iws, i__2 = (integer) work[1]; + iws = f2cmax(i__1,i__2); + if (na < *n) { +/* CC CALL SORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, */ +/* CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) */ + i__1 = *n - na; + sormqr_("Left", "Transpose", m, &i__1, &na, &a[a_offset], lda, & + tau[1], &a[(na + 1) * a_dim1 + 1], lda, &work[1], lwork, + info); +/* Computing MAX */ + i__1 = iws, i__2 = (integer) work[1]; + iws = f2cmax(i__1,i__2); + } + } + +/* Factorize free columns */ +/* ====================== */ + + if (nfxd < minmn) { + + sm = *m - nfxd; + sn = *n - nfxd; + sminmn = minmn - nfxd; + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "SGEQRF", " ", &sm, &sn, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nbmin = 2; + nx = 0; + + if (nb > 1 && nb < sminmn) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", &sm, &sn, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + + + if (nx < sminmn) { + +/* Determine if workspace is large enough for blocked code. */ + + minws = (sn << 1) + (sn + 1) * nb; + iws = f2cmax(iws,minws); + if (*lwork < minws) { + +/* Not enough workspace to use optimal NB: Reduce NB and */ +/* determine the minimum value of NB. */ + + nb = (*lwork - (sn << 1)) / (sn + 1); +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", &sm, &sn, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + + + } + } + } + +/* Initialize partial column norms. The first N elements of work */ +/* store the exact column norms. */ + + i__1 = *n; + for (j = nfxd + 1; j <= i__1; ++j) { + work[j] = snrm2_(&sm, &a[nfxd + 1 + j * a_dim1], &c__1); + work[*n + j] = work[j]; +/* L20: */ + } + + if (nb >= nbmin && nb < sminmn && nx < sminmn) { + +/* Use blocked code initially. */ + + j = nfxd + 1; + +/* Compute factorization: while loop. */ + + + topbmn = minmn - nx; +L30: + if (j <= topbmn) { +/* Computing MIN */ + i__1 = nb, i__2 = topbmn - j + 1; + jb = f2cmin(i__1,i__2); + +/* Factorize JB columns among columns J:N. */ + + i__1 = *n - j + 1; + i__2 = j - 1; + i__3 = *n - j + 1; + slaqps_(m, &i__1, &i__2, &jb, &fjb, &a[j * a_dim1 + 1], lda, & + jpvt[j], &tau[j], &work[j], &work[*n + j], &work[(*n + << 1) + 1], &work[(*n << 1) + jb + 1], &i__3); + + j += fjb; + goto L30; + } + } else { + j = nfxd + 1; + } + +/* Use unblocked code to factor the last or only block. */ + + + if (j <= minmn) { + i__1 = *n - j + 1; + i__2 = j - 1; + slaqp2_(m, &i__1, &i__2, &a[j * a_dim1 + 1], lda, &jpvt[j], &tau[ + j], &work[j], &work[*n + j], &work[(*n << 1) + 1]); + } + + } + + work[1] = (real) iws; + return 0; + +/* End of SGEQP3 */ + +} /* sgeqp3_ */ + diff --git a/lapack-netlib/SRC/sgeqr.c b/lapack-netlib/SRC/sgeqr.c new file mode 100644 index 000000000..6d268fd33 --- /dev/null +++ b/lapack-netlib/SRC/sgeqr.c @@ -0,0 +1,733 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQR */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, M, N, TSIZE, LWORK */ +/* REAL A( LDA, * ), T( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQR computes a QR factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a M-by-M orthogonal matrix; */ +/* > R is an upper-triangular N-by-N matrix; */ +/* > 0 is a (M-N)-by-N zero matrix, if M > N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix R */ +/* > (R is upper triangular if M >= N); */ +/* > the elements below the diagonal are used to store part of the */ +/* > data structure to represent Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (MAX(5,TSIZE)) */ +/* > On exit, if INFO = 0, T(1) returns optimal (or either minimal */ +/* > or optimal, if query is assumed) TSIZE. See TSIZE for details. */ +/* > Remaining T contains part of the data structure used to represent Q. */ +/* > If one wants to apply or construct Q, then one needs to keep T */ +/* > (in addition to A) and pass it to further subroutines. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TSIZE */ +/* > \verbatim */ +/* > TSIZE is INTEGER */ +/* > If TSIZE >= 5, the dimension of the array T. */ +/* > If TSIZE = -1 or -2, then a workspace query is assumed. The routine */ +/* > only calculates the sizes of the T and WORK arrays, returns these */ +/* > values as the first entries of the T and WORK arrays, and no error */ +/* > message related to T or WORK is issued by XERBLA. */ +/* > If TSIZE = -1, the routine calculates optimal size of T for the */ +/* > optimum performance and returns this value in T(1). */ +/* > If TSIZE = -2, the routine calculates minimal size of T and */ +/* > returns this value in T(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */ +/* > or optimal, if query was assumed) LWORK. */ +/* > See LWORK for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1 or -2, then a workspace query is assumed. The routine */ +/* > only calculates the sizes of the T and WORK arrays, returns these */ +/* > values as the first entries of the T and WORK arrays, and no error */ +/* > message related to T or WORK is issued by XERBLA. */ +/* > If LWORK = -1, the routine calculates optimal size of WORK for the */ +/* > optimal performance and returns this value in WORK(1). */ +/* > If LWORK = -2, the routine calculates minimal size of WORK and */ +/* > returns this value in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \par Further Details */ +/* ==================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The goal of the interface is to give maximum freedom to the developers for */ +/* > creating any QR factorization algorithm they wish. The triangular */ +/* > (trapezoidal) R has to be stored in the upper part of A. The lower part of A */ +/* > and the array T can be used to store any relevant information for applying or */ +/* > constructing the Q factor. The WORK array can safely be discarded after exit. */ +/* > */ +/* > Caution: One should not expect the sizes of T and WORK to be the same from one */ +/* > LAPACK implementation to the other, or even from one execution to the other. */ +/* > A workspace query (for T and WORK) is needed at each execution. However, */ +/* > for a given execution, the size of T and WORK are fixed and will not change */ +/* > from one query to the next. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* > \par Further Details particular to this LAPACK implementation: */ +/* ============================================================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > These details are particular for this LAPACK implementation. Users should not */ +/* > take them for granted. These details may change in the future, and are not likely */ +/* > true for another LAPACK implementation. These details are relevant if one wants */ +/* > to try to understand the code. They are not part of the interface. */ +/* > */ +/* > In this version, */ +/* > */ +/* > T(2): row block size (MB) */ +/* > T(3): column block size (NB) */ +/* > T(6:TSIZE): data structure needed for Q, computed by */ +/* > SLATSQR or SGEQRT */ +/* > */ +/* > Depending on the matrix dimensions M and N, and row and column */ +/* > block sizes MB and NB returned by ILAENV, SGEQR will use either */ +/* > SLATSQR (if the matrix is tall-and-skinny) or SGEQRT to compute */ +/* > the QR factorization. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqr_(integer *m, integer *n, real *a, integer *lda, + real *t, integer *tsize, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + logical mint, minw; + integer mb, nb, nblcks; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sgeqrt_(integer *, integer *, integer *, real + *, integer *, real *, integer *, real *, integer *); + logical lminws, lquery; + integer mintsz; + extern /* Subroutine */ int slatsqr_(integer *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , integer *); + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --t; + --work; + + /* Function Body */ + *info = 0; + + lquery = *tsize == -1 || *tsize == -2 || *lwork == -1 || *lwork == -2; + + mint = FALSE_; + minw = FALSE_; + if (*tsize == -2 || *lwork == -2) { + if (*tsize != -1) { + mint = TRUE_; + } + if (*lwork != -1) { + minw = TRUE_; + } + } + +/* Determine the block size */ + + if (f2cmin(*m,*n) > 0) { + mb = ilaenv_(&c__1, "SGEQR ", " ", m, n, &c__1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb = ilaenv_(&c__1, "SGEQR ", " ", m, n, &c__2, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + mb = *m; + nb = 1; + } + if (mb > *m || mb <= *n) { + mb = *m; + } + if (nb > f2cmin(*m,*n) || nb < 1) { + nb = 1; + } + mintsz = *n + 5; + if (mb > *n && *m > *n) { + if ((*m - *n) % (mb - *n) == 0) { + nblcks = (*m - *n) / (mb - *n); + } else { + nblcks = (*m - *n) / (mb - *n) + 1; + } + } else { + nblcks = 1; + } + +/* Determine if the workspace size satisfies minimal size */ + + lminws = FALSE_; +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n * nblcks + 5; + if ((*tsize < f2cmax(i__1,i__2) || *lwork < nb * *n) && *lwork >= *n && * + tsize >= mintsz && ! lquery) { +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n * nblcks + 5; + if (*tsize < f2cmax(i__1,i__2)) { + lminws = TRUE_; + nb = 1; + mb = *m; + } + if (*lwork < nb * *n) { + lminws = TRUE_; + nb = 1; + } + } + + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n * nblcks + 5; + if (*tsize < f2cmax(i__1,i__2) && ! lquery && ! lminws) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n * nb; + if (*lwork < f2cmax(i__1,i__2) && ! lquery && ! lminws) { + *info = -8; + } + } + } + + if (*info == 0) { + if (mint) { + t[1] = (real) mintsz; + } else { + t[1] = (real) (nb * *n * nblcks + 5); + } + t[2] = (real) mb; + t[3] = (real) nb; + if (minw) { + work[1] = (real) f2cmax(1,*n); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n; + work[1] = (real) f2cmax(i__1,i__2); + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQR", &i__1, (ftnlen)5); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (f2cmin(*m,*n) == 0) { + return 0; + } + +/* The QR Decomposition */ + + if (*m <= *n || mb <= *n || mb >= *m) { + sgeqrt_(m, n, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], info); + } else { + slatsqr_(m, n, &mb, &nb, &a[a_offset], lda, &t[6], &nb, &work[1], + lwork, info); + } + +/* Computing MAX */ + i__1 = 1, i__2 = nb * *n; + work[1] = (real) f2cmax(i__1,i__2); + + return 0; + +/* End of SGEQR */ + +} /* sgeqr_ */ + diff --git a/lapack-netlib/SRC/sgeqr2.c b/lapack-netlib/SRC/sgeqr2.c new file mode 100644 index 000000000..69eb614bc --- /dev/null +++ b/lapack-netlib/SRC/sgeqr2.c @@ -0,0 +1,602 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQR2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQR2 computes a QR factorization of a real m-by-n matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a m-by-m orthogonal matrix; */ +/* > R is an upper-triangular n-by-n matrix; */ +/* > 0 is a (m-n)-by-n zero matrix, if m > n. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n); the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqr2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *, ftnlen), slarfg_(integer *, real *, real *, + integer *, real *); + real aii; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQR2", &i__1, (ftnlen)6); + return 0; + } + + k = f2cmin(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of SGEQR2 */ + +} /* sgeqr2_ */ + diff --git a/lapack-netlib/SRC/sgeqr2p.c b/lapack-netlib/SRC/sgeqr2p.c new file mode 100644 index 000000000..ee2f98679 --- /dev/null +++ b/lapack-netlib/SRC/sgeqr2p.c @@ -0,0 +1,607 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagona +l elements using an unblocked algorithm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQR2P + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQR2P computes a QR factorization of a real m-by-n matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a m-by-m orthogonal matrix; */ +/* > R is an upper-triangular n-by-n matrix with nonnegative diagonal */ +/* > entries; */ +/* > 0 is a (m-n)-by-n zero matrix, if m > n. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(m,n) by n upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n). The diagonal entries of R */ +/* > are nonnegative; the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > See Lapack Working Note 203 for details */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqr2p_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *, ftnlen); + real aii; + extern /* Subroutine */ int slarfgp_(integer *, real *, real *, integer *, + real *); + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQR2P", &i__1, (ftnlen)7); + return 0; + } + + k = f2cmin(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfgp_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * + a_dim1], &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of SGEQR2P */ + +} /* sgeqr2p_ */ + diff --git a/lapack-netlib/SRC/sgeqrf.c b/lapack-netlib/SRC/sgeqrf.c new file mode 100644 index 000000000..e642999f1 --- /dev/null +++ b/lapack-netlib/SRC/sgeqrf.c @@ -0,0 +1,700 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQRF computes a QR factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a M-by-M orthogonal matrix; */ +/* > R is an upper-triangular N-by-N matrix; */ +/* > 0 is a (M-N)-by-N zero matrix, if M > N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n); the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of f2cmin(m,n) elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqrf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo; + extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer + *, real *, real *, integer *); + integer ib, nb, nx; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the QR factorization of the current block */ +/* A(i:m,i:i+ib-1) */ + + i__3 = *m - i__ + 1; + sgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *m - i__ + 1; + slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H**T to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (real) iws; + return 0; + +/* End of SGEQRF */ + +} /* sgeqrf_ */ + diff --git a/lapack-netlib/SRC/sgeqrfp.c b/lapack-netlib/SRC/sgeqrfp.c new file mode 100644 index 000000000..ef5362657 --- /dev/null +++ b/lapack-netlib/SRC/sgeqrfp.c @@ -0,0 +1,703 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQRFP */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQRFP + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQR2P computes a QR factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a M-by-M orthogonal matrix; */ +/* > R is an upper-triangular N-by-N matrix with nonnegative diagonal */ +/* > entries; */ +/* > 0 is a (M-N)-by-N zero matrix, if M > N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n). The diagonal entries of R */ +/* > are nonnegative; the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of f2cmin(m,n) elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2019 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > See Lapack Working Note 203 for details */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqrfp_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo, ib, nb, nx; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + integer ldwork, lwkopt; + logical lquery; + extern /* Subroutine */ int sgeqr2p_(integer *, integer *, real *, + integer *, real *, real *, integer *); + integer iws; + + +/* -- LAPACK computational routine (version 3.9.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2019 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQRFP", &i__1, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the QR factorization of the current block */ +/* A(i:m,i:i+ib-1) */ + + i__3 = *m - i__ + 1; + sgeqr2p_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *m - i__ + 1; + slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H**T to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgeqr2p_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + } + + work[1] = (real) iws; + return 0; + +/* End of SGEQRFP */ + +} /* sgeqrfp_ */ + diff --git a/lapack-netlib/SRC/sgeqrt.c b/lapack-netlib/SRC/sgeqrt.c new file mode 100644 index 000000000..6364bfee7 --- /dev/null +++ b/lapack-netlib/SRC/sgeqrt.c @@ -0,0 +1,628 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQRT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQRT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) */ + +/* INTEGER INFO, LDA, LDT, M, N, NB */ +/* REAL A( LDA, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQRT computes a blocked QR factorization of a real M-by-N matrix A */ +/* > using the compact WY representation of Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB */ +/* > \verbatim */ +/* > NB is INTEGER */ +/* > The block size to be used in the blocked QR. MIN(M,N) >= NB >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix R (R is */ +/* > upper triangular if M >= N); the elements below the diagonal */ +/* > are the columns of V. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT,MIN(M,N)) */ +/* > The upper triangular block reflectors stored in compact form */ +/* > as a sequence of upper triangular blocks. See below */ +/* > for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= NB. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (NB*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th column */ +/* > below the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 ) */ +/* > ( v1 1 ) */ +/* > ( v1 v2 1 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. */ +/* > */ +/* > Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each */ +/* > block is of order NB except for the last block, which is of order */ +/* > IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block */ +/* > reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB */ +/* > for the last block) T's are stored in the NB-by-K matrix T as */ +/* > */ +/* > T = (T1 T2 ... TB). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqrt_(integer *m, integer *n, integer *nb, real *a, + integer *lda, real *t, integer *ldt, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, k, iinfo, ib; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen), sgeqrt2_( + integer *, integer *, real *, integer *, real *, integer *, + integer *), sgeqrt3_(integer *, integer *, real *, integer *, + real *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nb < 1 || *nb > f2cmin(*m,*n) && f2cmin(*m,*n) > 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldt < *nb) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQRT", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + k = f2cmin(*m,*n); + if (k == 0) { + return 0; + } + +/* Blocked loop of length K */ + + i__1 = k; + i__2 = *nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,*nb); + +/* Compute the QR factorization of the current block A(I:M,I:I+IB-1) */ + + if (TRUE_) { + i__3 = *m - i__ + 1; + sgeqrt3_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + + 1], ldt, &iinfo); + } else { + i__3 = *m - i__ + 1; + sgeqrt2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &t[i__ * t_dim1 + + 1], ldt, &iinfo); + } + if (i__ + ib <= *n) { + +/* Update by applying H**T to A(I:M,I+IB:N) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + i__5 = *n - i__ - ib + 1; + slarfb_("L", "T", "F", "C", &i__3, &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &t[i__ * t_dim1 + 1], ldt, &a[i__ + (i__ + + ib) * a_dim1], lda, &work[1], &i__5); + } + } + return 0; + +/* End of SGEQRT */ + +} /* sgeqrt_ */ + diff --git a/lapack-netlib/SRC/sgeqrt2.c b/lapack-netlib/SRC/sgeqrt2.c new file mode 100644 index 000000000..b778e6711 --- /dev/null +++ b/lapack-netlib/SRC/sgeqrt2.c @@ -0,0 +1,645 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY re +presentation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQRT2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQRT2( M, N, A, LDA, T, LDT, INFO ) */ + +/* INTEGER INFO, LDA, LDT, M, N */ +/* REAL A( LDA, * ), T( LDT, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQRT2 computes a QR factorization of a real M-by-N matrix A, */ +/* > using the compact WY representation of Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the real M-by-N matrix A. On exit, the elements on and */ +/* > above the diagonal contain the N-by-N upper triangular matrix R; the */ +/* > elements below the diagonal are the columns of V. See below for */ +/* > further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT,N) */ +/* > The N-by-N upper triangular factor of the block reflector. */ +/* > The elements on and above the diagonal contain the block */ +/* > reflector T; the elements below the diagonal are not used. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th column */ +/* > below the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 ) */ +/* > ( v1 1 ) */ +/* > ( v1 v2 1 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. The */ +/* > block reflector H is then given by */ +/* > */ +/* > H = I - V * T * V**T */ +/* > */ +/* > where V**T is the transpose of V. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqrt2_(integer *m, integer *n, real *a, integer *lda, + real *t, integer *ldt, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3; + + /* Local variables */ + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + integer i__, k; + real alpha; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *), strmv_(char *, char *, char *, integer *, real *, + integer *, real *, integer *), xerbla_( + char *, integer *, ftnlen), slarfg_(integer *, real *, real *, + integer *, real *); + real aii; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*ldt < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQRT2", &i__1, (ftnlen)7); + return 0; + } + + k = f2cmin(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[f2cmin(i__3,*m) + i__ * a_dim1] + , &c__1, &t[i__ + t_dim1]); + if (i__ < *n) { + +/* Apply H(i) to A(I:M,I+1:N) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + +/* W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + sgemv_("T", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], + lda, &a[i__ + i__ * a_dim1], &c__1, &c_b7, &t[*n * t_dim1 + + 1], &c__1); + +/* A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H */ + + alpha = -t[i__ + t_dim1]; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + sger_(&i__2, &i__3, &alpha, &a[i__ + i__ * a_dim1], &c__1, &t[*n * + t_dim1 + 1], &c__1, &a[i__ + (i__ + 1) * a_dim1], lda); + a[i__ + i__ * a_dim1] = aii; + } + } + + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + +/* T(1:I-1,I) := alpha * A(I:M,1:I-1)**T * A(I:M,I) */ + + alpha = -t[i__ + t_dim1]; + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + sgemv_("T", &i__2, &i__3, &alpha, &a[i__ + a_dim1], lda, &a[i__ + i__ + * a_dim1], &c__1, &c_b7, &t[i__ * t_dim1 + 1], &c__1); + a[i__ + i__ * a_dim1] = aii; + +/* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) */ + + i__2 = i__ - 1; + strmv_("U", "N", "N", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], + &c__1); + +/* T(I,I) = tau(I) */ + + t[i__ + i__ * t_dim1] = t[i__ + t_dim1]; + t[i__ + t_dim1] = 0.f; + } + +/* End of SGEQRT2 */ + + return 0; +} /* sgeqrt2_ */ + diff --git a/lapack-netlib/SRC/sgeqrt3.c b/lapack-netlib/SRC/sgeqrt3.c new file mode 100644 index 000000000..7154346c4 --- /dev/null +++ b/lapack-netlib/SRC/sgeqrt3.c @@ -0,0 +1,678 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the c +ompact WY representation of Q. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGEQRT3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGEQRT3( M, N, A, LDA, T, LDT, INFO ) */ + +/* INTEGER INFO, LDA, M, N, LDT */ +/* REAL A( LDA, * ), T( LDT, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGEQRT3 recursively computes a QR factorization of a real M-by-N */ +/* > matrix A, using the compact WY representation of Q. */ +/* > */ +/* > Based on the algorithm of Elmroth and Gustavson, */ +/* > IBM J. Res. Develop. Vol 44 No. 4 July 2000. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the real M-by-N matrix A. On exit, the elements on and */ +/* > above the diagonal contain the N-by-N upper triangular matrix R; the */ +/* > elements below the diagonal are the columns of V. See below for */ +/* > further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT,N) */ +/* > The N-by-N upper triangular factor of the block reflector. */ +/* > The elements on and above the diagonal contain the block */ +/* > reflector T; the elements below the diagonal are not used. */ +/* > See below for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix V stores the elementary reflectors H(i) in the i-th column */ +/* > below the diagonal. For example, if M=5 and N=3, the matrix V is */ +/* > */ +/* > V = ( 1 ) */ +/* > ( v1 1 ) */ +/* > ( v1 v2 1 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > where the vi's represent the vectors which define H(i), which are returned */ +/* > in the matrix A. The 1's along the diagonal of V are not stored in A. The */ +/* > block reflector H is then given by */ +/* > */ +/* > H = I - V * T * V**T */ +/* > */ +/* > where V**T is the transpose of V. */ +/* > */ +/* > For details of the algorithm, see Elmroth and Gustavson (cited above). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgeqrt3_(integer *m, integer *n, real *a, integer *lda, + real *t, integer *ldt, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + integer i1, j1, n1, n2; + extern /* Subroutine */ int strmm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *, ftnlen), slarfg_(integer *, real *, real *, integer *, real *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -2; + } else if (*m < *n) { + *info = -1; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*ldt < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQRT3", &i__1, (ftnlen)7); + return 0; + } + + if (*n == 1) { + +/* Compute Householder transform when N=1 */ + + slarfg_(m, &a[a_dim1 + 1], &a[f2cmin(2,*m) + a_dim1], &c__1, &t[t_dim1 + + 1]); + + } else { + +/* Otherwise, split A into blocks... */ + + n1 = *n / 2; + n2 = *n - n1; +/* Computing MIN */ + i__1 = n1 + 1; + j1 = f2cmin(i__1,*n); +/* Computing MIN */ + i__1 = *n + 1; + i1 = f2cmin(i__1,*m); + +/* Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H */ + + sgeqrt3_(m, &n1, &a[a_offset], lda, &t[t_offset], ldt, &iinfo); + +/* Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] */ + + i__1 = n2; + for (j = 1; j <= i__1; ++j) { + i__2 = n1; + for (i__ = 1; i__ <= i__2; ++i__) { + t[i__ + (j + n1) * t_dim1] = a[i__ + (j + n1) * a_dim1]; + } + } + strmm_("L", "L", "T", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 * + t_dim1 + 1], ldt) + ; + + i__1 = *m - n1; + sgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[j1 + a_dim1], lda, &a[j1 + + j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt); + + strmm_("L", "U", "T", "N", &n1, &n2, &c_b8, &t[t_offset], ldt, &t[j1 * + t_dim1 + 1], ldt) + ; + + i__1 = *m - n1; + sgemm_("N", "N", &i__1, &n2, &n1, &c_b20, &a[j1 + a_dim1], lda, &t[j1 + * t_dim1 + 1], ldt, &c_b8, &a[j1 + j1 * a_dim1], lda); + + strmm_("L", "L", "N", "U", &n1, &n2, &c_b8, &a[a_offset], lda, &t[j1 * + t_dim1 + 1], ldt) + ; + + i__1 = n2; + for (j = 1; j <= i__1; ++j) { + i__2 = n1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + (j + n1) * a_dim1] -= t[i__ + (j + n1) * t_dim1]; + } + } + +/* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H */ + + i__1 = *m - n1; + sgeqrt3_(&i__1, &n2, &a[j1 + j1 * a_dim1], lda, &t[j1 + j1 * t_dim1], + ldt, &iinfo); + +/* Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 */ + + i__1 = n1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = n2; + for (j = 1; j <= i__2; ++j) { + t[i__ + (j + n1) * t_dim1] = a[j + n1 + i__ * a_dim1]; + } + } + + strmm_("R", "L", "N", "U", &n1, &n2, &c_b8, &a[j1 + j1 * a_dim1], lda, + &t[j1 * t_dim1 + 1], ldt); + + i__1 = *m - *n; + sgemm_("T", "N", &n1, &n2, &i__1, &c_b8, &a[i1 + a_dim1], lda, &a[i1 + + j1 * a_dim1], lda, &c_b8, &t[j1 * t_dim1 + 1], ldt); + + strmm_("L", "U", "N", "N", &n1, &n2, &c_b20, &t[t_offset], ldt, &t[j1 + * t_dim1 + 1], ldt); + + strmm_("R", "U", "N", "N", &n1, &n2, &c_b8, &t[j1 + j1 * t_dim1], ldt, + &t[j1 * t_dim1 + 1], ldt); + +/* Y = (Y1,Y2); R = [ R1 A(1:N1,J1:N) ]; T = [T1 T3] */ +/* [ 0 R2 ] [ 0 T2] */ + + } + + return 0; + +/* End of SGEQRT3 */ + +} /* sgeqrt3_ */ + diff --git a/lapack-netlib/SRC/sgerfs.c b/lapack-netlib/SRC/sgerfs.c new file mode 100644 index 000000000..5fa56c801 --- /dev/null +++ b/lapack-netlib/SRC/sgerfs.c @@ -0,0 +1,879 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGERFS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGERFS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, */ +/* X, LDX, FERR, BERR, WORK, IWORK, INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGERFS improves the computed solution to a system of linear */ +/* > equations and provides error bounds and backward error estimates for */ +/* > the solution. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > The original N-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is REAL array, dimension (LDAF,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by SGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from SGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is REAL array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by SGETRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is REAL array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (3*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > ITMAX is the maximum number of steps of iterative refinement. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgerfs_(char *trans, integer *n, integer *nrhs, real *a, + integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, + integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2, i__3; + real r__1, r__2, r__3; + + /* Local variables */ + integer kase; + real safe1, safe2; + integer i__, j, k; + real s; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + integer count; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), saxpy_(integer *, real *, real *, integer *, real *, + integer *), slacn2_(integer *, real *, real *, integer *, real *, + integer *, integer *); + real xk; + extern real slamch_(char *); + integer nz; + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran; + extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, + integer *, integer *, real *, integer *, integer *); + char transt[1]; + real lstres, eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } else if (*ldx < f2cmax(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGERFS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.f; + berr[j] = 0.f; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = *n + 1; + eps = slamch_("Epsilon"); + safmin = slamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.f; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + scopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + sgemv_(trans, n, n, &c_b15, &a[a_offset], lda, &x[j * x_dim1 + 1], & + c__1, &c_b17, &work[*n + 1], &c__1); + +/* Compute componentwise relative backward error from formula */ + +/* f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1)); +/* L30: */ + } + +/* Compute abs(op(A))*abs(X) + abs(B). */ + + if (notran) { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + xk = (r__1 = x[k + j * x_dim1], abs(r__1)); + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + work[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * xk; +/* L40: */ + } +/* L50: */ + } + } else { + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + s = 0.f; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + s += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 = x[ + i__ + j * x_dim1], abs(r__2)); +/* L60: */ + } + work[k] += s; +/* L70: */ + } + } + s = 0.f; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + r__2 = s, r__3 = (r__1 = work[*n + i__], abs(r__1)) / work[ + i__]; + s = f2cmax(r__2,r__3); + } else { +/* Computing MAX */ + r__2 = s, r__3 = ((r__1 = work[*n + i__], abs(r__1)) + safe1) + / (work[i__] + safe1); + s = f2cmax(r__2,r__3); + } +/* L80: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) { + +/* Update solution and try again. */ + + sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n + + 1], n, info); + saxpy_(n, &c_b17, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use SLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * + work[i__] + safe1; + } +/* L90: */ + } + + kase = 0; +L100: + slacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**T). */ + + sgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & + work[*n + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L110: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L120: */ + } + sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & + work[*n + 1], n, info); + } + goto L100; + } + +/* Normalize error. */ + + lstres = 0.f; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = lstres, r__3 = (r__1 = x[i__ + j * x_dim1], abs(r__1)); + lstres = f2cmax(r__2,r__3); +/* L130: */ + } + if (lstres != 0.f) { + ferr[j] /= lstres; + } + +/* L140: */ + } + + return 0; + +/* End of SGERFS */ + +} /* sgerfs_ */ + diff --git a/lapack-netlib/SRC/sgerfsx.c b/lapack-netlib/SRC/sgerfsx.c new file mode 100644 index 000000000..fb494094f --- /dev/null +++ b/lapack-netlib/SRC/sgerfsx.c @@ -0,0 +1,1146 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGERFSX */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGERFSX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, */ +/* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, */ +/* WORK, IWORK, INFO ) */ + +/* CHARACTER TRANS, EQUED */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */ +/* $ N_ERR_BNDS */ +/* REAL RCOND */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ X( LDX , * ), WORK( * ) */ +/* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGERFSX improves the computed solution to a system of linear */ +/* > equations and provides error bounds and backward error estimates */ +/* > for the solution. In addition to normwise error bound, the code */ +/* > provides maximum componentwise error bound if possible. See */ +/* > comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the */ +/* > error bounds. */ +/* > */ +/* > The original system of linear equations may have been equilibrated */ +/* > before calling this routine, as described by arguments EQUED, R */ +/* > and C below. In this case, the solution and error bounds returned */ +/* > are for the original unequilibrated system. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \verbatim */ +/* > Some optional parameters are bundled in the PARAMS array. These */ +/* > settings determine how refinement is performed, but often the */ +/* > defaults are acceptable. If the defaults are acceptable, users */ +/* > can pass NPARAMS = 0 which prevents the source code from accessing */ +/* > the PARAMS argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done to A */ +/* > before calling this routine. This is needed to compute */ +/* > the solution and error bounds correctly. */ +/* > = 'N': No equilibration */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > The right hand side B has been changed accordingly. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > The original N-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is REAL array, dimension (LDAF,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by SGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from SGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] R */ +/* > \verbatim */ +/* > R is REAL array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. */ +/* > If R is accessed, each element of R should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. */ +/* > If C is accessed, each element of C should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is REAL array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by SGETRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > Componentwise relative backward error. This is the */ +/* > componentwise relative backward error of each solution vector X(j) */ +/* > (i.e., the smallest relative change in any element of A or B that */ +/* > makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_ERR_BNDS */ +/* > \verbatim */ +/* > N_ERR_BNDS is INTEGER */ +/* > Number of error bounds to return for each right hand side */ +/* > and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* > ERR_BNDS_COMP below. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NPARAMS */ +/* > \verbatim */ +/* > NPARAMS is INTEGER */ +/* > Specifies the number of parameters set in PARAMS. If <= 0, the */ +/* > PARAMS array is never referenced and default values are used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PARAMS */ +/* > \verbatim */ +/* > PARAMS is REAL array, dimension NPARAMS */ +/* > Specifies algorithm parameters. If an entry is < 0.0, then */ +/* > that entry will be filled with default value used for that */ +/* > parameter. Only positions up to NPARAMS are accessed; defaults */ +/* > are used for higher-numbered parameters. */ +/* > */ +/* > PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* > refinement or not. */ +/* > Default: 1.0 */ +/* > = 0.0: No refinement is performed, and no error bounds are */ +/* > computed. */ +/* > = 1.0: Use the double-precision refinement algorithm, */ +/* > possibly with doubled-single computations if the */ +/* > compilation environment does not support DOUBLE */ +/* > PRECISION. */ +/* > (other values are reserved for future use) */ +/* > */ +/* > PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* > computations allowed for refinement. */ +/* > Default: 10 */ +/* > Aggressive: Set to 100 to permit convergence using approximate */ +/* > factorizations or factorizations other than LU. If */ +/* > the factorization uses a technique other than */ +/* > Gaussian elimination, the guarantees in */ +/* > err_bnds_norm and err_bnds_comp may no longer be */ +/* > trustworthy. */ +/* > */ +/* > PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* > will attempt to find a solution with small componentwise */ +/* > relative error in the double-precision algorithm. Positive */ +/* > is true, 0.0 is false. */ +/* > Default: 1.0 (attempt componentwise convergence) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. The solution to every right-hand side is */ +/* > guaranteed. */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly singular, so */ +/* > the solution and error bounds could not be computed. RCOND = 0 */ +/* > is returned. */ +/* > = N+J: The solution corresponding to the Jth right-hand side is */ +/* > not guaranteed. The solutions corresponding to other right- */ +/* > hand sides K with K > J may not be guaranteed as well, but */ +/* > only the first such right-hand side is reported. If a small */ +/* > componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* > the Jth right-hand side is the first with a normwise error */ +/* > bound that is not guaranteed (the smallest J such */ +/* > that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* > the Jth right-hand side is the first with either a normwise or */ +/* > componentwise error bound that is not guaranteed (the smallest */ +/* > J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* > ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* > ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* > about all of the right-hand sides check ERR_BNDS_NORM or */ +/* > ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgerfsx_(char *trans, char *equed, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + real *r__, real *c__, real *b, integer *ldb, real *x, integer *ldx, + real *rcond, real *berr, integer *n_err_bnds__, real *err_bnds_norm__, + real *err_bnds_comp__, integer *nparams, real *params, real *work, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + real r__1, r__2; + + /* Local variables */ + real illrcond_thresh__, unstable_thresh__; + extern /* Subroutine */ int sla_gerfsx_extended_(integer *, integer *, + integer *, integer *, real *, integer *, real *, integer *, + integer *, logical *, real *, real *, integer *, real *, integer * + , real *, integer *, real *, real *, real *, real *, real *, real + *, real *, integer *, real *, real *, logical *, integer *); + real err_lbnd__; + char norm[1]; + integer ref_type__; + extern integer ilatrans_(char *); + logical ignore_cwise__; + integer j; + extern logical lsame_(char *, char *); + real anorm, rcond_tmp__; + integer prec_type__; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgecon_( + char *, integer *, real *, integer *, real *, real *, real *, + integer *, integer *); + logical colequ, notran, rowequ; + integer trans_type__; + extern integer ilaprec_(char *); + extern real sla_gercond_(char *, integer *, real *, integer *, real *, + integer *, integer *, integer *, real *, integer *, real *, + integer *); + integer ithresh, n_norms__; + real rthresh, cwise_wrong__; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ================================================================== */ + + +/* Check the input parameters. */ + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + trans_type__ = ilatrans_(trans); + ref_type__ = 1; + if (*nparams >= 1) { + if (params[1] < 0.f) { + params[1] = 1.f; + } else { + ref_type__ = params[1]; + } + } + +/* Set default parameters. */ + + illrcond_thresh__ = (real) (*n) * slamch_("Epsilon"); + ithresh = 10; + rthresh = .5f; + unstable_thresh__ = .25f; + ignore_cwise__ = FALSE_; + + if (*nparams >= 2) { + if (params[2] < 0.f) { + params[2] = (real) ithresh; + } else { + ithresh = (integer) params[2]; + } + } + if (*nparams >= 3) { + if (params[3] < 0.f) { + if (ignore_cwise__) { + params[3] = 0.f; + } else { + params[3] = 1.f; + } + } else { + ignore_cwise__ = params[3] == 0.f; + } + } + if (ref_type__ == 0 || *n_err_bnds__ == 0) { + n_norms__ = 0; + } else if (ignore_cwise__) { + n_norms__ = 1; + } else { + n_norms__ = 2; + } + + notran = lsame_(trans, "N"); + rowequ = lsame_(equed, "R") || lsame_(equed, "B"); + colequ = lsame_(equed, "C") || lsame_(equed, "B"); + +/* Test input parameters. */ + + if (trans_type__ == -1) { + *info = -1; + } else if (! rowequ && ! colequ && ! lsame_(equed, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (*ldb < f2cmax(1,*n)) { + *info = -13; + } else if (*ldx < f2cmax(1,*n)) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGERFSX", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *nrhs == 0) { + *rcond = 1.f; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 0.f; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.f; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.f; + } + if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 0.f; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 0.f; + } + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 1.f; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 1.f; + } + } + return 0; + } + +/* Default to failure. */ + + *rcond = 0.f; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + berr[j] = 1.f; + if (*n_err_bnds__ >= 1) { + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.f; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.f; + } + if (*n_err_bnds__ >= 2) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.f; + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.f; + } + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = 0.f; + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = 0.f; + } + } + +/* Compute the norm of A and the reciprocal of the condition */ +/* number of A. */ + + if (notran) { + *(unsigned char *)norm = 'I'; + } else { + *(unsigned char *)norm = '1'; + } + anorm = slange_(norm, n, n, &a[a_offset], lda, &work[1]); + sgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], + info); + +/* Perform refinement on each right-hand side */ + + if (ref_type__ != 0) { + prec_type__ = ilaprec_("D"); + if (notran) { + sla_gerfsx_extended_(&prec_type__, &trans_type__, n, nrhs, &a[ + a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &colequ, & + c__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], & + n_norms__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], & + work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, + &rthresh, &unstable_thresh__, &ignore_cwise__, info); + } else { + sla_gerfsx_extended_(&prec_type__, &trans_type__, n, nrhs, &a[ + a_offset], lda, &af[af_offset], ldaf, &ipiv[1], &rowequ, & + r__[1], &b[b_offset], ldb, &x[x_offset], ldx, &berr[1], & + n_norms__, &err_bnds_norm__[err_bnds_norm_offset], & + err_bnds_comp__[err_bnds_comp_offset], &work[*n + 1], & + work[1], &work[(*n << 1) + 1], &work[1], rcond, &ithresh, + &rthresh, &unstable_thresh__, &ignore_cwise__, info); + } + } +/* Computing MAX */ + r__1 = 10.f, r__2 = sqrt((real) (*n)); + err_lbnd__ = f2cmax(r__1,r__2) * slamch_("Epsilon"); + if (*n_err_bnds__ >= 1 && n_norms__ >= 1) { + +/* Compute scaled normwise condition number cond(A*C). */ + + if (colequ && notran) { + rcond_tmp__ = sla_gercond_(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c_n1, &c__[1], info, &work[1] + , &iwork[1]); + } else if (rowequ && ! notran) { + rcond_tmp__ = sla_gercond_(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c_n1, &r__[1], info, &work[1] + , &iwork[1]); + } else { + rcond_tmp__ = sla_gercond_(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c__0, &r__[1], info, &work[1] + , &iwork[1]); + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_norm__[j + (err_bnds_norm_dim1 + << 1)] > 1.f) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.f; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = 1.f; + err_bnds_norm__[j + err_bnds_norm_dim1] = 0.f; + if (*info <= *n) { + *info = *n + j; + } + } else if (err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] < + err_lbnd__) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = err_lbnd__; + err_bnds_norm__[j + err_bnds_norm_dim1] = 1.f; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_norm__[j + err_bnds_norm_dim1 * 3] = rcond_tmp__; + } + } + } + if (*n_err_bnds__ >= 1 && n_norms__ >= 2) { + +/* Compute componentwise condition number cond(A*diag(Y(:,J))) for */ +/* each right-hand side using the current solution as an estimate of */ +/* the true solution. If the componentwise error estimate is too */ +/* large, then the solution is a lousy estimate of truth and the */ +/* estimated RCOND may be too optimistic. To avoid misleading users, */ +/* the inverse condition number is set to 0.0 when the estimated */ +/* cwise error is at least CWISE_WRONG. */ + + cwise_wrong__ = sqrt(slamch_("Epsilon")); + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + cwise_wrong__) { + rcond_tmp__ = sla_gercond_(trans, n, &a[a_offset], lda, &af[ + af_offset], ldaf, &ipiv[1], &c__1, &x[j * x_dim1 + 1], + info, &work[1], &iwork[1]); + } else { + rcond_tmp__ = 0.f; + } + +/* Cap the error at 1.0. */ + + if (*n_err_bnds__ >= 2 && err_bnds_comp__[j + (err_bnds_comp_dim1 + << 1)] > 1.f) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.f; + } + +/* Threshold the error (see LAWN). */ + + if (rcond_tmp__ < illrcond_thresh__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = 1.f; + err_bnds_comp__[j + err_bnds_comp_dim1] = 0.f; + if (params[3] == 1.f && *info < *n + j) { + *info = *n + j; + } + } else if (err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] < + err_lbnd__) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = err_lbnd__; + err_bnds_comp__[j + err_bnds_comp_dim1] = 1.f; + } + +/* Save the condition number. */ + + if (*n_err_bnds__ >= 3) { + err_bnds_comp__[j + err_bnds_comp_dim1 * 3] = rcond_tmp__; + } + } + } + + return 0; + +/* End of SGERFSX */ + +} /* sgerfsx_ */ + diff --git a/lapack-netlib/SRC/sgerq2.c b/lapack-netlib/SRC/sgerq2.c new file mode 100644 index 000000000..ec6d0a5e8 --- /dev/null +++ b/lapack-netlib/SRC/sgerq2.c @@ -0,0 +1,587 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGERQ2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGERQ2 computes an RQ factorization of a real m by n matrix A: */ +/* > A = R * Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, if m <= n, the upper triangle of the subarray */ +/* > A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; */ +/* > if m >= n, the elements on and above the (m-n)-th subdiagonal */ +/* > contain the m by n upper trapezoidal matrix R; the remaining */ +/* > elements, with the array TAU, represent the orthogonal matrix */ +/* > Q as a product of elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* > A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgerq2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, k; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *, ftnlen), slarfg_(integer *, real *, real *, + integer *, real *); + real aii; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGERQ2", &i__1, (ftnlen)6); + return 0; + } + + k = f2cmin(*m,*n); + + for (i__ = k; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(m-k+i,1:n-k+i-1) */ + + i__1 = *n - k + i__; + slarfg_(&i__1, &a[*m - k + i__ + (*n - k + i__) * a_dim1], &a[*m - k + + i__ + a_dim1], lda, &tau[i__]); + +/* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right */ + + aii = a[*m - k + i__ + (*n - k + i__) * a_dim1]; + a[*m - k + i__ + (*n - k + i__) * a_dim1] = 1.f; + i__1 = *m - k + i__ - 1; + i__2 = *n - k + i__; + slarf_("Right", &i__1, &i__2, &a[*m - k + i__ + a_dim1], lda, &tau[ + i__], &a[a_offset], lda, &work[1]); + a[*m - k + i__ + (*n - k + i__) * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of SGERQ2 */ + +} /* sgerq2_ */ + diff --git a/lapack-netlib/SRC/sgerqf.c b/lapack-netlib/SRC/sgerqf.c new file mode 100644 index 000000000..5a0024da9 --- /dev/null +++ b/lapack-netlib/SRC/sgerqf.c @@ -0,0 +1,711 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGERQF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGERQF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, M, N */ +/* REAL A( LDA, * ), TAU( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGERQF computes an RQ factorization of a real M-by-N matrix A: */ +/* > A = R * Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if m <= n, the upper triangle of the subarray */ +/* > A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; */ +/* > if m >= n, the elements on and above the (m-n)-th subdiagonal */ +/* > contain the M-by-N upper trapezoidal matrix R; */ +/* > the remaining elements, with the array TAU, represent the */ +/* > orthogonal matrix Q as a product of f2cmin(m,n) elementary */ +/* > reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,M). */ +/* > For optimum performance LWORK >= M*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* > A(m-k+i,1:n-k+i-1), and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgerqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, nbmin, iinfo; + extern /* Subroutine */ int sgerq2_(integer *, integer *, real *, integer + *, real *, real *, integer *); + integer ib, nb, ki, kk, mu, nu, nx; + extern /* Subroutine */ int slarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + integer ldwork, lwkopt; + logical lquery; + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } else if (*lwork < f2cmax(1,*m) && ! lquery) { + *info = -7; + } + + if (*info == 0) { + k = f2cmin(*m,*n); + if (k == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *m * nb; + work[1] = (real) lwkopt; + } + work[1] = (real) lwkopt; + + if (*lwork < f2cmax(1,*m) && ! lquery) { + *info = -7; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGERQF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + return 0; + } + + nbmin = 2; + nx = 1; + iws = *m; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGERQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGERQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially. */ +/* The last kk rows are handled by the block method. */ + + ki = (k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = k, i__2 = ki + nb; + kk = f2cmin(i__1,i__2); + + i__1 = k - kk + 1; + i__2 = -nb; + for (i__ = k - kk + ki + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ + += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = f2cmin(i__3,nb); + +/* Compute the RQ factorization of the current block */ +/* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) */ + + i__3 = *n - k + i__ + ib - 1; + sgerq2_(&ib, &i__3, &a[*m - k + i__ + a_dim1], lda, &tau[i__], & + work[1], &iinfo); + if (*m - k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *n - k + i__ + ib - 1; + slarft_("Backward", "Rowwise", &i__3, &ib, &a[*m - k + i__ + + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */ + + i__3 = *m - k + i__ - 1; + i__4 = *n - k + i__ + ib - 1; + slarfb_("Right", "No transpose", "Backward", "Rowwise", &i__3, + &i__4, &ib, &a[*m - k + i__ + a_dim1], lda, &work[1], + &ldwork, &a[a_offset], lda, &work[ib + 1], &ldwork); + } +/* L10: */ + } + mu = *m - k + i__ + nb - 1; + nu = *n - k + i__ + nb - 1; + } else { + mu = *m; + nu = *n; + } + +/* Use unblocked code to factor the last or only block */ + + if (mu > 0 && nu > 0) { + sgerq2_(&mu, &nu, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + } + + work[1] = (real) iws; + return 0; + +/* End of SGERQF */ + +} /* sgerqf_ */ + diff --git a/lapack-netlib/SRC/sgesc2.c b/lapack-netlib/SRC/sgesc2.c new file mode 100644 index 000000000..6634cde8e --- /dev/null +++ b/lapack-netlib/SRC/sgesc2.c @@ -0,0 +1,604 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGESC2 solves a system of linear equations using the LU factorization with complete pivoting co +mputed by sgetc2. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGESC2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) */ + +/* INTEGER LDA, N */ +/* REAL SCALE */ +/* INTEGER IPIV( * ), JPIV( * ) */ +/* REAL A( LDA, * ), RHS( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGESC2 solves a system of linear equations */ +/* > */ +/* > A * X = scale* RHS */ +/* > */ +/* > with a general N-by-N matrix A using the LU factorization with */ +/* > complete pivoting computed by SGETC2. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the LU part of the factorization of the n-by-n */ +/* > matrix A computed by SGETC2: A = P * L * U * Q */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1, N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] RHS */ +/* > \verbatim */ +/* > RHS is REAL array, dimension (N). */ +/* > On entry, the right hand side vector b. */ +/* > On exit, the solution vector X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N). */ +/* > The pivot indices; for 1 <= i <= N, row i of the */ +/* > matrix has been interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JPIV */ +/* > \verbatim */ +/* > JPIV is INTEGER array, dimension (N). */ +/* > The pivot indices; for 1 <= j <= N, column j of the */ +/* > matrix has been interchanged with column JPIV(j). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is REAL */ +/* > On exit, SCALE contains the scale factor. SCALE is chosen */ +/* > 0 <= SCALE <= 1 to prevent overflow in the solution. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* > Umea University, S-901 87 Umea, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int sgesc2_(integer *n, real *a, integer *lda, real *rhs, + integer *ipiv, integer *jpiv, real *scale) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + real temp; + integer i__, j; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + slabad_(real *, real *); + extern real slamch_(char *); + real bignum; + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *); + real smlnum, eps; + + +/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Set constant to control overflow */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --rhs; + --ipiv; + --jpiv; + + /* Function Body */ + eps = slamch_("P"); + smlnum = slamch_("S") / eps; + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Apply permutations IPIV to RHS */ + + i__1 = *n - 1; + slaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1); + +/* Solve for L part */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + rhs[j] -= a[j + i__ * a_dim1] * rhs[i__]; +/* L10: */ + } +/* L20: */ + } + +/* Solve for U part */ + + *scale = 1.f; + +/* Check for scaling */ + + i__ = isamax_(n, &rhs[1], &c__1); + if (smlnum * 2.f * (r__1 = rhs[i__], abs(r__1)) > (r__2 = a[*n + *n * + a_dim1], abs(r__2))) { + temp = .5f / (r__1 = rhs[i__], abs(r__1)); + sscal_(n, &temp, &rhs[1], &c__1); + *scale *= temp; + } + + for (i__ = *n; i__ >= 1; --i__) { + temp = 1.f / a[i__ + i__ * a_dim1]; + rhs[i__] *= temp; + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + rhs[i__] -= rhs[j] * (a[i__ + j * a_dim1] * temp); +/* L30: */ + } +/* L40: */ + } + +/* Apply permutations JPIV to the solution (RHS) */ + + i__1 = *n - 1; + slaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1); + return 0; + +/* End of SGESC2 */ + +} /* sgesc2_ */ + diff --git a/lapack-netlib/SRC/sgesdd.c b/lapack-netlib/SRC/sgesdd.c new file mode 100644 index 000000000..1429b40c1 --- /dev/null +++ b/lapack-netlib/SRC/sgesdd.c @@ -0,0 +1,2165 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGESDD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGESDD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, */ +/* WORK, LWORK, IWORK, INFO ) */ + +/* CHARACTER JOBZ */ +/* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N */ +/* INTEGER IWORK( * ) */ +/* REAL A( LDA, * ), S( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGESDD computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, optionally computing the left and right singular */ +/* > vectors. If singular vectors are desired, it uses a */ +/* > divide-and-conquer algorithm. */ +/* > */ +/* > The SVD is written */ +/* > */ +/* > A = U * SIGMA * transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ +/* > V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first f2cmin(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > Note that the routine returns VT = V**T, not V. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'A': all M columns of U and all N rows of V**T are */ +/* > returned in the arrays U and VT; */ +/* > = 'S': the first f2cmin(M,N) columns of U and the first */ +/* > f2cmin(M,N) rows of V**T are returned in the arrays U */ +/* > and VT; */ +/* > = 'O': If M >= N, the first N columns of U are overwritten */ +/* > on the array A and all rows of V**T are returned in */ +/* > the array VT; */ +/* > otherwise, all columns of U are returned in the */ +/* > array U and the first M rows of V**T are overwritten */ +/* > in the array A; */ +/* > = 'N': no columns of U or rows of V**T are computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if JOBZ = 'O', A is overwritten with the first N columns */ +/* > of U (the left singular vectors, stored */ +/* > columnwise) if M >= N; */ +/* > A is overwritten with the first M rows */ +/* > of V**T (the right singular vectors, stored */ +/* > rowwise) otherwise. */ +/* > if JOBZ .ne. 'O', the contents of A are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is REAL array, dimension (f2cmin(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is REAL array, dimension (LDU,UCOL) */ +/* > UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; */ +/* > UCOL = f2cmin(M,N) if JOBZ = 'S'. */ +/* > If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M */ +/* > orthogonal matrix U; */ +/* > if JOBZ = 'S', U contains the first f2cmin(M,N) columns of U */ +/* > (the left singular vectors, stored columnwise); */ +/* > if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; if */ +/* > JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is REAL array, dimension (LDVT,N) */ +/* > If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the */ +/* > N-by-N orthogonal matrix V**T; */ +/* > if JOBZ = 'S', VT contains the first f2cmin(M,N) rows of */ +/* > V**T (the right singular vectors, stored rowwise); */ +/* > if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; */ +/* > if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; */ +/* > if JOBZ = 'S', LDVT >= f2cmin(M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1. */ +/* > If LWORK = -1, a workspace query is assumed. The optimal */ +/* > size for the WORK array is calculated and stored in WORK(1), */ +/* > and no other work except argument checking is performed. */ +/* > */ +/* > Let mx = f2cmax(M,N) and mn = f2cmin(M,N). */ +/* > If JOBZ = 'N', LWORK >= 3*mn + f2cmax( mx, 7*mn ). */ +/* > If JOBZ = 'O', LWORK >= 3*mn + f2cmax( mx, 5*mn*mn + 4*mn ). */ +/* > If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn. */ +/* > If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx. */ +/* > These are not tight minimums in all cases; see comments inside code. */ +/* > For good performance, LWORK should generally be larger; */ +/* > a query is recommended. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (8*f2cmin(M,N)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: SBDSDC did not converge, updating process failed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup realGEsing */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgesdd_(char *jobz, integer *m, integer *n, real *a, + integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, + real *work, integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Local variables */ + integer lwork_sgelqf_mn__, lwork_sgeqrf_mn__, iscl, lwork_sorglq_mn__, + lwork_sorglq_nn__; + real anrm; + integer idum[1], ierr, itau, lwork_sorgqr_mm__, lwork_sorgqr_mn__, + lwork_sormbr_qln_mm__, lwork_sormbr_qln_mn__, + lwork_sormbr_qln_nn__, lwork_sormbr_prt_mm__, + lwork_sormbr_prt_mn__, lwork_sormbr_prt_nn__, i__; + extern logical lsame_(char *, char *); + integer chunk; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + integer minmn, wrkbl, itaup, itauq, mnthr; + logical wntqa; + integer nwork; + logical wntqn, wntqo, wntqs; + integer ie, il, ir, bdspac, iu, lwork_sorgbr_p_mm__; + extern /* Subroutine */ int sbdsdc_(char *, char *, integer *, real *, + real *, real *, integer *, real *, integer *, real *, integer *, + real *, integer *, integer *); + integer lwork_sorgbr_q_nn__; + extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, integer *, integer *); + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slascl_(char *, integer + *, integer *, real *, real *, integer *, integer *, real *, + integer *, integer *), sgeqrf_(integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), slacpy_(char + *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + extern logical sisnan_(real *); + extern /* Subroutine */ int sorgbr_(char *, integer *, integer *, integer + *, real *, integer *, real *, real *, integer *, integer *); + integer ldwrkl; + extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , real *, integer *, integer *); + integer ldwrkr, minwrk, ldwrku, maxwrk; + extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + integer ldwkvt; + real smlnum; + logical wntqas; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + logical lquery; + integer blk; + real dum[1], eps; + integer ivt, lwork_sgebrd_mm__, lwork_sgebrd_mn__, lwork_sgebrd_nn__; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + lquery = *lwork == -1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* Compute workspace */ +/* Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace allocated at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + bdspac = 0; + mnthr = (integer) (minmn * 11.f / 6.f); + if (*m >= *n && minmn > 0) { + +/* Compute space needed for SBDSDC */ + + if (wntqn) { +/* sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) */ +/* keep 7*N for backwards compatibility. */ + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + +/* Compute space preferred for each routine */ + sgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_sgebrd_mn__ = (integer) dum[0]; + + sgebrd_(n, n, dum, n, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_sgebrd_nn__ = (integer) dum[0]; + + sgeqrf_(m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_sgeqrf_mn__ = (integer) dum[0]; + + sorgbr_("Q", n, n, n, dum, n, dum, dum, &c_n1, &ierr); + lwork_sorgbr_q_nn__ = (integer) dum[0]; + + sorgqr_(m, m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_sorgqr_mm__ = (integer) dum[0]; + + sorgqr_(m, n, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_sorgqr_mn__ = (integer) dum[0]; + + sormbr_("P", "R", "T", n, n, n, dum, n, dum, dum, n, dum, &c_n1, & + ierr); + lwork_sormbr_prt_nn__ = (integer) dum[0]; + + sormbr_("Q", "L", "N", n, n, n, dum, n, dum, dum, n, dum, &c_n1, & + ierr); + lwork_sormbr_qln_nn__ = (integer) dum[0]; + + sormbr_("Q", "L", "N", m, n, n, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_sormbr_qln_mn__ = (integer) dum[0]; + + sormbr_("Q", "L", "N", m, m, n, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_sormbr_qln_mm__ = (integer) dum[0]; + + if (*m >= mnthr) { + if (wntqn) { + +/* Path 1 (M >> N, JOBZ='N') */ + + wrkbl = *n + lwork_sgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = f2cmax(i__1,i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + +/* Path 2 (M >> N, JOBZ='O') */ + + wrkbl = *n + lwork_sgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + lwork_sorgqr_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M >> N, JOBZ='S') */ + + wrkbl = *n + lwork_sgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + lwork_sorgqr_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M >> N, JOBZ='A') */ + + wrkbl = *n + lwork_sgeqrf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + lwork_sorgqr_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sgebrd_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *n * *n; +/* Computing MAX */ + i__1 = *n * 3 + bdspac, i__2 = *n + *m; + minwrk = *n * *n + f2cmax(i__1,i__2); + } + } else { + +/* Path 5 (M >= N, but not much larger) */ + + wrkbl = *n * 3 + lwork_sgebrd_mn__; + if (wntqn) { +/* Path 5n (M >= N, jobz='N') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *n * 3 + f2cmax(*m,bdspac); + } else if (wntqo) { +/* Path 5o (M >= N, jobz='O') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + f2cmax(i__1,i__2); + } else if (wntqs) { +/* Path 5s (M >= N, jobz='S') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *n * 3 + f2cmax(*m,bdspac); + } else if (wntqa) { +/* Path 5a (M >= N, jobz='A') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + lwork_sormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *n * 3 + f2cmax(*m,bdspac); + } + } + } else if (minmn > 0) { + +/* Compute space needed for SBDSDC */ + + if (wntqn) { +/* sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) */ +/* keep 7*N for backwards compatibility. */ + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + +/* Compute space preferred for each routine */ + sgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_sgebrd_mn__ = (integer) dum[0]; + + sgebrd_(m, m, &a[a_offset], m, &s[1], dum, dum, dum, dum, &c_n1, & + ierr); + lwork_sgebrd_mm__ = (integer) dum[0]; + + sgelqf_(m, n, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_sgelqf_mn__ = (integer) dum[0]; + + sorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr); + lwork_sorglq_nn__ = (integer) dum[0]; + + sorglq_(m, n, m, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_sorglq_mn__ = (integer) dum[0]; + + sorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr); + lwork_sorgbr_p_mm__ = (integer) dum[0]; + + sormbr_("P", "R", "T", m, m, m, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_sormbr_prt_mm__ = (integer) dum[0]; + + sormbr_("P", "R", "T", m, n, m, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_sormbr_prt_mn__ = (integer) dum[0]; + + sormbr_("P", "R", "T", n, n, m, dum, n, dum, dum, n, dum, &c_n1, & + ierr); + lwork_sormbr_prt_nn__ = (integer) dum[0]; + + sormbr_("Q", "L", "N", m, m, m, dum, m, dum, dum, m, dum, &c_n1, & + ierr); + lwork_sormbr_qln_mm__ = (integer) dum[0]; + + if (*n >= mnthr) { + if (wntqn) { + +/* Path 1t (N >> M, JOBZ='N') */ + + wrkbl = *m + lwork_sgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = f2cmax(i__1,i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + +/* Path 2t (N >> M, JOBZ='O') */ + + wrkbl = *m + lwork_sgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + lwork_sorglq_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N >> M, JOBZ='S') */ + + wrkbl = *m + lwork_sgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + lwork_sorglq_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N >> M, JOBZ='A') */ + + wrkbl = *m + lwork_sgelqf_mn__; +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + lwork_sorglq_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sgebrd_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *m * *m; +/* Computing MAX */ + i__1 = *m * 3 + bdspac, i__2 = *m + *n; + minwrk = *m * *m + f2cmax(i__1,i__2); + } + } else { + +/* Path 5t (N > M, but not much larger) */ + + wrkbl = *m * 3 + lwork_sgebrd_mn__; + if (wntqn) { +/* Path 5tn (N > M, jobz='N') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *m * 3 + f2cmax(*n,bdspac); + } else if (wntqo) { +/* Path 5to (N > M, jobz='O') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = f2cmax(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + f2cmax(i__1,i__2); + } else if (wntqs) { +/* Path 5ts (N > M, jobz='S') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_mn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *m * 3 + f2cmax(*n,bdspac); + } else if (wntqa) { +/* Path 5ta (N > M, jobz='A') */ +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_qln_mm__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + lwork_sormbr_prt_nn__; + wrkbl = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = f2cmax(i__1,i__2); + minwrk = *m * 3 + f2cmax(*n,bdspac); + } + } + } + maxwrk = f2cmax(maxwrk,minwrk); + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESDD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = sqrt(slamch_("S")) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, dum); + if (sisnan_(&anrm)) { + *info = -4; + return 0; + } + iscl = 0; + if (anrm > 0.f && anrm < smlnum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce using the QR */ +/* decomposition (if sufficient workspace available) */ + + if (*m >= mnthr) { + + if (wntqn) { + +/* Path 1 (M >> N, JOBZ='N') */ +/* No singular vectors to be computed */ + + itau = 1; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* Workspace: need N [tau] + N [work] */ +/* Workspace: prefer N [tau] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("L", &i__1, &i__2, &c_b63, &c_b63, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* Workspace: need 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work] */ + + i__1 = *lwork - nwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *n; + +/* Perform bidiagonal SVD, computing singular values only */ +/* Workspace: need N [e] + BDSPAC */ + + sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* Path 2 (M >> N, JOBZ = 'O') */ +/* N left singular vectors to be overwritten on A and */ +/* N right singular vectors to be computed in VT */ + + ir = 1; + +/* WORK(IR) is LDWRKR by N */ + + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* Workspace: need N*N [R] + N [tau] + N [work] */ +/* Workspace: prefer N*N [R] + N [tau] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("L", &i__1, &i__2, &c_b63, &c_b63, &work[ir + 1], & + ldwrkr); + +/* Generate Q in A */ +/* Workspace: need N*N [R] + N [tau] + N [work] */ +/* Workspace: prefer N*N [R] + N [tau] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] */ + + i__1 = *lwork - nwork + 1; + sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* WORK(IU) is N by N */ + + iu = nwork; + nwork = iu + *n * *n; + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in WORK(IU) and computing right */ +/* singular vectors of bidiagonal matrix in VT */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC */ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite WORK(IU) by left singular vectors of R */ +/* and VT by right singular vectors of R */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work] */ +/* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], n, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in WORK(IR) and copying to A */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] */ +/* Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U] */ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = f2cmin(i__3,ldwrkr); + sgemm_("N", "N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], + lda, &work[iu], n, &c_b63, &work[ir], &ldwrkr); + slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* Path 3 (M >> N, JOBZ='S') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* Compute A=Q*R */ +/* Workspace: need N*N [R] + N [tau] + N [work] */ +/* Workspace: prefer N*N [R] + N [tau] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + slaset_("L", &i__2, &i__1, &c_b63, &c_b63, &work[ir + 1], & + ldwrkr); + +/* Generate Q in A */ +/* Workspace: need N*N [R] + N [tau] + N [work] */ +/* Workspace: prefer N*N [R] + N [tau] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagoal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC */ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of R and VT */ +/* by right singular vectors of R */ +/* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in U */ +/* Workspace: need N*N [R] */ + + slacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + sgemm_("N", "N", m, n, n, &c_b84, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b63, &u[u_offset], ldu); + + } else if (wntqa) { + +/* Path 4 (M >> N, JOBZ='A') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* Workspace: need N*N [U] + N [tau] + N [work] */ +/* Workspace: prefer N*N [U] + N [tau] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* Generate Q in U */ +/* Workspace: need N*N [U] + N [tau] + M [work] */ +/* Workspace: prefer N*N [U] + N [tau] + M*NB [work] */ + i__2 = *lwork - nwork + 1; + sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out other entries */ + + i__2 = *n - 1; + i__1 = *n - 1; + slaset_("L", &i__2, &i__1, &c_b63, &c_b63, &a[a_dim1 + 2], + lda); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in WORK(IU) and computing right */ +/* singular vectors of bidiagonal matrix in VT */ +/* Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC */ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite WORK(IU) by left singular vectors of R and VT */ +/* by right singular vectors of R */ +/* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* Workspace: need N*N [U] */ + + sgemm_("N", "N", m, n, n, &c_b84, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b63, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else { + +/* M .LT. MNTHR */ + +/* Path 5 (M >= N, but not much larger) */ +/* Reduce to bidiagonal form without QR decomposition */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize A */ +/* Workspace: need 3*N [e, tauq, taup] + M [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* Path 5n (M >= N, JOBZ='N') */ +/* Perform bidiagonal SVD, only computing singular values */ +/* Workspace: need 3*N [e, tauq, taup] + BDSPAC */ + + sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { +/* Path 5o (M >= N, JOBZ='O') */ + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + nwork = iu + ldwrku * *n; + slaset_("F", m, n, &c_b63, &c_b63, &work[iu], &ldwrku); +/* IR is unused; silence compile warnings */ + ir = -1; + } else { + +/* WORK( IU ) is N by N */ + + ldwrku = *n; + nwork = iu + ldwrku * *n; + +/* WORK(IR) is LDWRKR by N */ + + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in WORK(IU) and computing right */ +/* singular vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC */ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & + vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ + 1], info); + +/* Overwrite VT by right singular vectors of A */ +/* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* Path 5o-fast */ +/* Overwrite WORK(IU) by left singular vectors of A */ +/* Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* Copy left singular vectors of A from WORK(IU) to A */ + + slacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* Path 5o-slow */ +/* Generate Q in A */ +/* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__2, &ierr); + +/* Multiply Q in A by left singular vectors of */ +/* bidiagonal matrix in WORK(IU), storing result in */ +/* WORK(IR) and copying to A */ +/* Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R] */ +/* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R] */ + + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = f2cmin(i__3,ldwrkr); + sgemm_("N", "N", &chunk, n, n, &c_b84, &a[i__ + + a_dim1], lda, &work[iu], &ldwrku, &c_b63, & + work[ir], &ldwrkr); + slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L20: */ + } + } + + } else if (wntqs) { + +/* Path 5s (M >= N, JOBZ='S') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*N [e, tauq, taup] + BDSPAC */ + + slaset_("F", m, n, &c_b63, &c_b63, &u[u_offset], ldu); + sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* Workspace: need 3*N [e, tauq, taup] + N [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* Path 5a (M >= N, JOBZ='A') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*N [e, tauq, taup] + BDSPAC */ + + slaset_("F", m, m, &c_b63, &c_b63, &u[u_offset], ldu); + sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + if (*m > *n) { + i__1 = *m - *n; + i__2 = *m - *n; + slaset_("F", &i__1, &i__2, &c_b63, &c_b84, &u[*n + 1 + (* + n + 1) * u_dim1], ldu); + } + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* Workspace: need 3*N [e, tauq, taup] + M [work] */ +/* Workspace: prefer 3*N [e, tauq, taup] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce using the LQ decomposition (if */ +/* sufficient workspace available) */ + + if (*n >= mnthr) { + + if (wntqn) { + +/* Path 1t (N >> M, JOBZ='N') */ +/* No singular vectors to be computed */ + + itau = 1; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* Workspace: need M [tau] + M [work] */ +/* Workspace: prefer M [tau] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out above L */ + + i__1 = *m - 1; + i__2 = *m - 1; + slaset_("U", &i__1, &i__2, &c_b63, &c_b63, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* Workspace: need 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work] */ + + i__1 = *lwork - nwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *m; + +/* Perform bidiagonal SVD, computing singular values only */ +/* Workspace: need M [e] + BDSPAC */ + + sbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* Path 2t (N >> M, JOBZ='O') */ +/* M right singular vectors to be overwritten on A and */ +/* M left singular vectors to be computed in U */ + + ivt = 1; + +/* WORK(IVT) is M by M */ +/* WORK(IL) is M by M; it is later resized to M by chunk for gemm */ + + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work] */ +/* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + slaset_("U", &i__1, &i__2, &c_b63, &c_b63, &work[il + ldwrkl], + &ldwrkl); + +/* Generate Q in A */ +/* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work] */ +/* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL) */ +/* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] */ + + i__1 = *lwork - nwork + 1; + sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U, and computing right singular */ +/* vectors of bidiagonal matrix in WORK(IVT) */ +/* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC */ + + sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], m, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of L and WORK(IVT) */ +/* by right singular vectors of L */ +/* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); + +/* Multiply right singular vectors of L in WORK(IVT) by Q */ +/* in A, storing result in WORK(IL) and copying to A */ +/* Workspace: need M*M [VT] + M*M [L] */ +/* Workspace: prefer M*M [VT] + M*N [L] */ +/* At this point, L is resized as M by chunk. */ + + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = f2cmin(i__3,chunk); + sgemm_("N", "N", m, &blk, m, &c_b84, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b63, &work[il], & + ldwrkl); + slacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L30: */ + } + + } else if (wntqs) { + +/* Path 3t (N >> M, JOBZ='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* Compute A=L*Q */ +/* Workspace: need M*M [L] + M [tau] + M [work] */ +/* Workspace: prefer M*M [L] + M [tau] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + slaset_("U", &i__2, &i__1, &c_b63, &c_b63, &work[il + ldwrkl], + &ldwrkl); + +/* Generate Q in A */ +/* Workspace: need M*M [L] + M [tau] + M [work] */ +/* Workspace: prefer M*M [L] + M [tau] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU). */ +/* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC */ + + sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of L and VT */ +/* by right singular vectors of L */ +/* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* Multiply right singular vectors of L in WORK(IL) by */ +/* Q in A, storing result in VT */ +/* Workspace: need M*M [L] */ + + slacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + sgemm_("N", "N", m, n, m, &c_b84, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b63, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* Path 4t (N >> M, JOBZ='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* Workspace: need M*M [VT] + M [tau] + M [work] */ +/* Workspace: prefer M*M [VT] + M [tau] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* Generate Q in VT */ +/* Workspace: need M*M [VT] + M [tau] + N [work] */ +/* Workspace: prefer M*M [VT] + M [tau] + N*NB [work] */ + + i__2 = *lwork - nwork + 1; + sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__2, &ierr); + +/* Produce L in A, zeroing out other entries */ + + i__2 = *m - 1; + i__1 = *m - 1; + slaset_("U", &i__2, &i__1, &c_b63, &c_b63, &a[(a_dim1 << 1) + + 1], lda); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in WORK(IVT) */ +/* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC */ + + sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* Overwrite U by left singular vectors of L and WORK(IVT) */ +/* by right singular vectors of L */ +/* Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work] */ +/* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* Multiply right singular vectors of L in WORK(IVT) by */ +/* Q in VT, storing result in A */ +/* Workspace: need M*M [VT] */ + + sgemm_("N", "N", m, n, m, &c_b84, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b63, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else { + +/* N .LT. MNTHR */ + +/* Path 5t (N > M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize A */ +/* Workspace: need 3*M [e, tauq, taup] + N [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work] */ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* Path 5tn (N > M, JOBZ='N') */ +/* Perform bidiagonal SVD, only computing singular values */ +/* Workspace: need 3*M [e, tauq, taup] + BDSPAC */ + + sbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { +/* Path 5to (N > M, JOBZ='O') */ + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* WORK( IVT ) is M by N */ + + slaset_("F", m, n, &c_b63, &c_b63, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; +/* IL is unused; silence compile warnings */ + il = -1; + } else { + +/* WORK( IVT ) is M by M */ + + nwork = ivt + ldwkvt * *m; + il = nwork; + +/* WORK(IL) is M by CHUNK */ + + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in WORK(IVT) */ +/* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC */ + + sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* Overwrite U by left singular vectors of A */ +/* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* Path 5to-fast */ +/* Overwrite WORK(IVT) by left singular vectors of A */ +/* Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + +/* Copy right singular vectors of A from WORK(IVT) to A */ + + slacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* Path 5to-slow */ +/* Generate P**T in A */ +/* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] */ + + i__2 = *lwork - nwork + 1; + sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* Multiply Q in A by right singular vectors of */ +/* bidiagonal matrix in WORK(IVT), storing result in */ +/* WORK(IL) and copying to A */ +/* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L] */ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = f2cmin(i__3,chunk); + sgemm_("N", "N", m, &blk, m, &c_b84, &work[ivt], & + ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b63, & + work[il], m); + slacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + } + } else if (wntqs) { + +/* Path 5ts (N > M, JOBZ='S') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*M [e, tauq, taup] + BDSPAC */ + + slaset_("F", m, n, &c_b63, &c_b63, &vt[vt_offset], ldvt); + sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* Workspace: need 3*M [e, tauq, taup] + M [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + M*NB [work] */ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* Path 5ta (N > M, JOBZ='A') */ +/* Perform bidiagonal SVD, computing left singular vectors */ +/* of bidiagonal matrix in U and computing right singular */ +/* vectors of bidiagonal matrix in VT */ +/* Workspace: need 3*M [e, tauq, taup] + BDSPAC */ + + slaset_("F", n, n, &c_b63, &c_b63, &vt[vt_offset], ldvt); + sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of VT to identity matrix */ + + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + slaset_("F", &i__1, &i__2, &c_b63, &c_b84, &vt[*m + 1 + (* + m + 1) * vt_dim1], ldvt); + } + +/* Overwrite U by left singular vectors of A and VT */ +/* by right singular vectors of A */ +/* Workspace: need 3*M [e, tauq, taup] + N [work] */ +/* Workspace: prefer 3*M [e, tauq, taup] + N*NB [work] */ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (anrm < smlnum) { + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (real) maxwrk; + + return 0; + +/* End of SGESDD */ + +} /* sgesdd_ */ + diff --git a/lapack-netlib/SRC/sgesv.c b/lapack-netlib/SRC/sgesv.c new file mode 100644 index 000000000..694ba139e --- /dev/null +++ b/lapack-netlib/SRC/sgesv.c @@ -0,0 +1,576 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGESV computes the solution to system of linear equations A * X = B for GE matrices (simpl +e driver) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGESV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* REAL A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGESV computes the solution to a real system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > The LU decomposition with partial pivoting and row interchanges is */ +/* > used to factor A as */ +/* > A = P * L * U, */ +/* > where P is a permutation matrix, L is unit lower triangular, and U is */ +/* > upper triangular. The factored form of A is then used to solve the */ +/* > system of equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N coefficient matrix A. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices that define the permutation matrix P; */ +/* > row i of the matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS matrix of right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int sgesv_(integer *n, integer *nrhs, real *a, integer *lda, + integer *ipiv, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgetrf_( + integer *, integer *, real *, integer *, integer *, integer *), + sgetrs_(char *, integer *, integer *, real *, integer *, integer * + , real *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESV ", &i__1, (ftnlen)5); + return 0; + } + +/* Compute the LU factorization of A. */ + + sgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + sgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of SGESV */ + +} /* sgesv_ */ + diff --git a/lapack-netlib/SRC/sgesvd.c b/lapack-netlib/SRC/sgesvd.c new file mode 100644 index 000000000..33df4bf82 --- /dev/null +++ b/lapack-netlib/SRC/sgesvd.c @@ -0,0 +1,4471 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGESVD computes the singular value decomposition (SVD) for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGESVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, */ +/* WORK, LWORK, INFO ) */ + +/* CHARACTER JOBU, JOBVT */ +/* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N */ +/* REAL A( LDA, * ), S( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGESVD computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, optionally computing the left and/or right singular */ +/* > vectors. The SVD is written */ +/* > */ +/* > A = U * SIGMA * transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ +/* > V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first f2cmin(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > Note that the routine returns V**T, not V. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'A': all M columns of U are returned in array U: */ +/* > = 'S': the first f2cmin(m,n) columns of U (the left singular */ +/* > vectors) are returned in the array U; */ +/* > = 'O': the first f2cmin(m,n) columns of U (the left singular */ +/* > vectors) are overwritten on the array A; */ +/* > = 'N': no columns of U (no left singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVT */ +/* > \verbatim */ +/* > JOBVT is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix */ +/* > V**T: */ +/* > = 'A': all N rows of V**T are returned in the array VT; */ +/* > = 'S': the first f2cmin(m,n) rows of V**T (the right singular */ +/* > vectors) are returned in the array VT; */ +/* > = 'O': the first f2cmin(m,n) rows of V**T (the right singular */ +/* > vectors) are overwritten on the array A; */ +/* > = 'N': no rows of V**T (no right singular vectors) are */ +/* > computed. */ +/* > */ +/* > JOBVT and JOBU cannot both be 'O'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if JOBU = 'O', A is overwritten with the first f2cmin(m,n) */ +/* > columns of U (the left singular vectors, */ +/* > stored columnwise); */ +/* > if JOBVT = 'O', A is overwritten with the first f2cmin(m,n) */ +/* > rows of V**T (the right singular vectors, */ +/* > stored rowwise); */ +/* > if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */ +/* > are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is REAL array, dimension (f2cmin(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is REAL array, dimension (LDU,UCOL) */ +/* > (LDU,M) if JOBU = 'A' or (LDU,f2cmin(M,N)) if JOBU = 'S'. */ +/* > If JOBU = 'A', U contains the M-by-M orthogonal matrix U; */ +/* > if JOBU = 'S', U contains the first f2cmin(m,n) columns of U */ +/* > (the left singular vectors, stored columnwise); */ +/* > if JOBU = 'N' or 'O', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; if */ +/* > JOBU = 'S' or 'A', LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is REAL array, dimension (LDVT,N) */ +/* > If JOBVT = 'A', VT contains the N-by-N orthogonal matrix */ +/* > V**T; */ +/* > if JOBVT = 'S', VT contains the first f2cmin(m,n) rows of */ +/* > V**T (the right singular vectors, stored rowwise); */ +/* > if JOBVT = 'N' or 'O', VT is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; if */ +/* > JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= f2cmin(M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ +/* > if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged */ +/* > superdiagonal elements of an upper bidiagonal matrix B */ +/* > whose diagonal is in S (not necessarily sorted). B */ +/* > satisfies A = U * B * VT, so it has the same singular values */ +/* > as A, and singular vectors related by U and VT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): */ +/* > - PATH 1 (M much larger than N, JOBU='N') */ +/* > - PATH 1t (N much larger than M, JOBVT='N') */ +/* > LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if SBDSQR did not converge, INFO specifies how many */ +/* > superdiagonals of an intermediate bidiagonal form B */ +/* > did not converge to zero. See the description of WORK */ +/* > above for details. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup realGEsing */ + +/* ===================================================================== */ +/* Subroutine */ int sgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, + integer *ldvt, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], + i__2, i__3, i__4; + char ch__1[2]; + + /* Local variables */ + integer iscl; + real anrm; + integer ierr, itau, ncvt, nrvt, lwork_sgebrd__, lwork_sgelqf__, + lwork_sgeqrf__, i__; + extern logical lsame_(char *, char *); + integer chunk; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + integer minmn, wrkbl, itaup, itauq, mnthr, iwork; + logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs; + integer ie, ir, bdspac, iu; + extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, integer *, integer *); + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + real bignum; + extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slascl_(char *, integer + *, integer *, real *, real *, integer *, integer *, real *, + integer *, integer *), sgeqrf_(integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), slacpy_(char + *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *), sbdsqr_(char *, integer *, integer *, + integer *, integer *, real *, real *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), sorgbr_( + char *, integer *, integer *, integer *, real *, integer *, real * + , real *, integer *, integer *), sormbr_(char *, char *, + char *, integer *, integer *, integer *, real *, integer *, real * + , real *, integer *, real *, integer *, integer *); + integer ldwrkr, minwrk, ldwrku, maxwrk; + extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + real smlnum; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + logical lquery, wntuas, wntvas; + integer blk, lwork_sorgbr_p__, lwork_sorgbr_q__, lwork_sorglq_m__, + lwork_sorglq_n__, ncu, lwork_sorgqr_n__, lwork_sorgqr_m__; + real eps, dum[1]; + integer nru; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --work; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + wntua = lsame_(jobu, "A"); + wntus = lsame_(jobu, "S"); + wntuas = wntua || wntus; + wntuo = lsame_(jobu, "O"); + wntun = lsame_(jobu, "N"); + wntva = lsame_(jobvt, "A"); + wntvs = lsame_(jobvt, "S"); + wntvas = wntva || wntvs; + wntvo = lsame_(jobvt, "O"); + wntvn = lsame_(jobvt, "N"); + lquery = *lwork == -1; + + if (! (wntua || wntus || wntuo || wntun)) { + *info = -1; + } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else if (*ldu < 1 || wntuas && *ldu < *m) { + *info = -9; + } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (*m >= *n && minmn > 0) { + +/* Compute space needed for SBDSQR */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + bdspac = *n * 5; +/* Compute space needed for SGEQRF */ + sgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_sgeqrf__ = (integer) dum[0]; +/* Compute space needed for SORGQR */ + sorgqr_(m, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_sorgqr_n__ = (integer) dum[0]; + sorgqr_(m, m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_sorgqr_m__ = (integer) dum[0]; +/* Compute space needed for SGEBRD */ + sgebrd_(n, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, + &ierr); + lwork_sgebrd__ = (integer) dum[0]; +/* Compute space needed for SORGBR P */ + sorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_sorgbr_p__ = (integer) dum[0]; +/* Compute space needed for SORGBR Q */ + sorgbr_("Q", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_sorgbr_q__ = (integer) dum[0]; + + if (*m >= mnthr) { + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ + + maxwrk = *n + lwork_sgeqrf__; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_sgebrd__; + maxwrk = f2cmax(i__2,i__3); + if (wntvo || wntvas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *n << 2; + minwrk = f2cmax(i__2,bdspac); + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ + + wrkbl = *n + lwork_sgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; + maxwrk = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_sgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; + maxwrk = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntus && wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ + + wrkbl = *n + lwork_sgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntus && wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ + + wrkbl = *n + lwork_sgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = (*n << 1) * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntus && wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_sgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntua && wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ + + wrkbl = *n + lwork_sgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntua && wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ + + wrkbl = *n + lwork_sgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = (*n << 1) * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } else if (wntua && wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_sgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } + } else { + +/* Path 10 (M at least N, but not much larger) */ + + sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, & + c_n1, &ierr); + lwork_sgebrd__ = (integer) dum[0]; + maxwrk = *n * 3 + lwork_sgebrd__; + if (wntus || wntuo) { + sorgbr_("Q", m, n, n, &a[a_offset], lda, dum, dum, &c_n1, + &ierr); + lwork_sorgbr_q__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + if (wntua) { + sorgbr_("Q", m, m, n, &a[a_offset], lda, dum, dum, &c_n1, + &ierr); + lwork_sorgbr_q__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + if (! wntvn) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = f2cmax(i__2,bdspac); + } + } else if (minmn > 0) { + +/* Compute space needed for SBDSQR */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + bdspac = *m * 5; +/* Compute space needed for SGELQF */ + sgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_sgelqf__ = (integer) dum[0]; +/* Compute space needed for SORGLQ */ + sorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr); + lwork_sorglq_n__ = (integer) dum[0]; + sorglq_(m, n, m, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_sorglq_m__ = (integer) dum[0]; +/* Compute space needed for SGEBRD */ + sgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, + &ierr); + lwork_sgebrd__ = (integer) dum[0]; +/* Compute space needed for SORGBR P */ + sorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr); + lwork_sorgbr_p__ = (integer) dum[0]; +/* Compute space needed for SORGBR Q */ + sorgbr_("Q", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr); + lwork_sorgbr_q__ = (integer) dum[0]; + if (*n >= mnthr) { + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ + + maxwrk = *m + lwork_sgelqf__; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_sgebrd__; + maxwrk = f2cmax(i__2,i__3); + if (wntuo || wntuas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *m << 2; + minwrk = f2cmax(i__2,bdspac); + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ + + wrkbl = *m + lwork_sgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; + maxwrk = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='O') */ + + wrkbl = *m + lwork_sgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; + maxwrk = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntvs && wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ + + wrkbl = *m + lwork_sgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntvs && wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ + + wrkbl = *m + lwork_sgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = (*m << 1) * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + maxwrk = f2cmax(maxwrk,minwrk); + } else if (wntvs && wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ + + wrkbl = *m + lwork_sgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntva && wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ + + wrkbl = *m + lwork_sgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntva && wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ + + wrkbl = *m + lwork_sgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = (*m << 1) * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } else if (wntva && wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ + + wrkbl = *m + lwork_sgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__; + wrkbl = f2cmax(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__; + wrkbl = f2cmax(i__2,i__3); + wrkbl = f2cmax(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } + } else { + +/* Path 10t(N greater than M, but not much larger) */ + + sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, & + c_n1, &ierr); + lwork_sgebrd__ = (integer) dum[0]; + maxwrk = *m * 3 + lwork_sgebrd__; + if (wntvs || wntvo) { +/* Compute space needed for SORGBR P */ + sorgbr_("P", m, n, m, &a[a_offset], n, dum, dum, &c_n1, & + ierr); + lwork_sorgbr_p__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + if (wntva) { + sorgbr_("P", n, n, m, &a[a_offset], n, dum, dum, &c_n1, & + ierr); + lwork_sorgbr_p__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_p__; + maxwrk = f2cmax(i__2,i__3); + } + if (! wntun) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_q__; + maxwrk = f2cmax(i__2,i__3); + } + maxwrk = f2cmax(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = f2cmax(i__2,bdspac); + } + } + maxwrk = f2cmax(maxwrk,minwrk); + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("SGESVD", &i__2, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = sqrt(slamch_("S")) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0.f && anrm < smlnum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce using the QR */ +/* decomposition (if sufficient workspace available) */ + + if (*m >= mnthr) { + + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ +/* No left singular vectors to be computed */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out below R */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[a_dim1 + 2], + lda); + } + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + ncvt = 0; + if (wntvo || wntvas) { + +/* If right singular vectors desired, generate P'. */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__2, &ierr); + ncvt = *n; + } + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A if desired */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], + info); + +/* If right singular vectors desired in VT, copy them there */ + + if (wntvas) { + slacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + } + + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ +/* N left singular vectors to be overwritten on A and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *n; + if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *n; + if (*lwork >= f2cmax(i__2,i__3) + *n * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N, WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n - *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy R to WORK(IR) and zero out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 1], + &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Generate left vectors bidiagonalizing R */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N+BDSPAC) */ + + sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, & + c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork] + , info); + iu = ie + *n; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (Workspace: need N*N+2*N, prefer N*N+M*N+N) */ + + i__2 = *m; + i__3 = ldwrku; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = f2cmin(i__4,ldwrku); + sgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + + a_dim1], lda, &work[ir], &ldwrkr, &c_b57, & + work[iu], &ldwrku); + slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ + + i__3 = *lwork - iwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate left vectors bidiagonalizing A */ +/* (Workspace: need 4*N, prefer 3*N+N*NB) */ + + i__3 = *lwork - iwork + 1; + sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__3, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, & + c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], + info); + + } + + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */ +/* N left singular vectors to be overwritten on A and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__3 = *n << 2; + if (*lwork >= *n * *n + f2cmax(i__3,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *n; + if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *n; + if (*lwork >= f2cmax(i__3,i__2) + *n * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N and WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n - *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__3 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__3, &ierr); + +/* Copy R to VT, zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__3 = *n - 1; + i__2 = *n - 1; + slaset_("L", &i__3, &i__2, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt); + } + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__3 = *lwork - iwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT, copying result to WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__3 = *lwork - iwork + 1; + sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], &i__3, & + ierr); + slacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], & + ldwrkr); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__3 = *lwork - iwork + 1; + sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__3, &ierr); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) */ + + i__3 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__3, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) and computing right */ +/* singular vectors of R in VT */ +/* (Workspace: need N*N+BDSPAC) */ + + sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info); + iu = ie + *n; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (Workspace: need N*N+2*N, prefer N*N+M*N+N) */ + + i__3 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = f2cmin(i__4,ldwrku); + sgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + + a_dim1], lda, &work[ir], &ldwrkr, &c_b57, & + work[iu], &ldwrku); + slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L20: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt); + } + +/* Generate Q in A */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], &i__2, & + ierr); + +/* Multiply Q in A by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, & + work[itauq], &a[a_offset], lda, &work[iwork], & + i__2, &ierr); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntus) { + + if (wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ +/* N left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + 1], &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N+BDSPAC) */ + + sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], + dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & + work[iwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in U */ +/* (Workspace: need N*N) */ + + sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, & + work[ir], &ldwrkr, &c_b57, &u[u_offset], ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], + dum, &c__1, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*N*N+4*N, */ +/* prefer 2*N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*N*N+4*N-1, */ +/* prefer 2*N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (Workspace: need 2*N*N+BDSPAC) */ + + sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, + &work[iwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (Workspace: need N*N) */ + + sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, & + work[iu], &ldwrku, &c_b57, &u[u_offset], ldu); + +/* Copy right singular vectors of R to A */ +/* (Workspace: need N*N) */ + + slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + +/* Generate right vectors bidiagonalizing R in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, + &work[iwork], info); + + } + + } else if (wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' */ +/* or 'A') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need N*N+4*N-1, */ +/* prefer N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (Workspace: need N*N+BDSPAC) */ + + sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, dum, & + c__1, &work[iwork], info); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (Workspace: need N*N) */ + + sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, & + work[iu], &ldwrku, &c_b57, &u[u_offset], ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt); + } + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } else if (wntua) { + + if (wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ +/* M left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Copy R to WORK(IR), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + 1], &ldwrkr); + +/* Generate Q in U */ +/* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N+BDSPAC) */ + + sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], + dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & + work[iwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IR), storing result in A */ +/* (Workspace: need N*N) */ + + sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, & + work[ir], &ldwrkr, &c_b57, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N+M, prefer N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], + dum, &c__1, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*N*N+4*N, */ +/* prefer 2*N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*N*N+4*N-1, */ +/* prefer 2*N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (Workspace: need 2*N*N+BDSPAC) */ + + sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, + &work[iwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (Workspace: need N*N) */ + + sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, & + work[iu], &ldwrku, &c_b57, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Copy right singular vectors of R from WORK(IR) to A */ + + slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N+M, prefer N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr) + ; + +/* Generate right bidiagonalizing vectors in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, + &work[iwork], info); + + } + + } else if (wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' */ +/* or 'A') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need N*N+4*N-1, */ +/* prefer N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (Workspace: need N*N+BDSPAC) */ + + sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, dum, & + c__1, &work[iwork], info); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (Workspace: need N*N) */ + + sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, & + work[iu], &ldwrku, &c_b57, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], + ldu); + +/* Generate Q in U */ +/* (Workspace: need N+M, prefer N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R from A to VT, zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt); + } + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (Workspace: need 3*N+M, prefer 3*N+M*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } + + } else { + +/* M .LT. MNTHR */ + +/* Path 10 (M at least N, but not much larger) */ +/* Reduce to bidiagonal form without QR decomposition */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) */ + + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + if (wntus) { + ncu = *n; + } + if (wntua) { + ncu = *m; + } + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[iwork], &i__2, &ierr); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*N, prefer 3*N+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr); + } + iwork = ie + *n; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info); + } + + } + + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce using the LQ decomposition (if */ +/* sufficient workspace available) */ + + if (*n >= mnthr) { + + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ +/* No right singular vectors to be computed */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + if (wntuo || wntuas) { + +/* If left singular vectors desired, generate Q */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__2, &ierr); + } + iwork = ie + *m; + nru = 0; + if (wntuo || wntuas) { + nru = *m; + } + +/* Perform bidiagonal QR iteration, computing left singular */ +/* vectors of A in A if desired */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, & + c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], + info); + +/* If left singular vectors desired in U, copy them there */ + + if (wntuas) { + slacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + } + + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *m; + if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *m; + if (*lwork >= f2cmax(i__2,i__3) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m - *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy L to WORK(IR) and zero out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Generate right vectors bidiagonalizing L */ +/* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M+BDSPAC) */ + + sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork] + , info); + iu = ie + *m; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (Workspace: need M*M+2*M, prefer M*M+M*N+M) */ + + i__2 = *n; + i__3 = chunk; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = f2cmin(i__4,chunk); + sgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, & + work[iu], &ldwrku); + slacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda); +/* L30: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__3 = *lwork - iwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate right vectors bidiagonalizing A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__3 = *lwork - iwork + 1; + sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__3, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, dum, &c__1, dum, &c__1, &work[ + iwork], info); + + } + + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__3 = *m << 2; + if (*lwork >= *m * *m + f2cmax(i__3,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *m; + if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *m; + if (*lwork >= f2cmax(i__3,i__2) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m - *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__3 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__3, &ierr); + +/* Copy L to U, zeroing about above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__3 = *m - 1; + i__2 = *m - 1; + slaset_("U", &i__3, &i__2, &c_b57, &c_b57, &u[(u_dim1 << + 1) + 1], ldu); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__3 = *lwork - iwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U, copying result to WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__3 = *lwork - iwork + 1; + sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + slacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr); + +/* Generate right vectors bidiagonalizing L in WORK(IR) */ +/* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */ + + i__3 = *lwork - iwork + 1; + sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__3, &ierr); + +/* Generate left vectors bidiagonalizing L in U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ + + i__3 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__3, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U, and computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M+BDSPAC) */ + + sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], + &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info); + iu = ie + *m; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) */ + + i__3 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = f2cmin(i__4,chunk); + sgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, & + work[iu], &ldwrku); + slacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda); +/* L40: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 << + 1) + 1], ldu); + +/* Generate Q in A */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in A */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[ + itaup], &a[a_offset], lda, &work[iwork], &i__2, & + ierr); + +/* Generate left vectors bidiagonalizing L in U */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntvs) { + + if (wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IR), zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right vectors bidiagonalizing L in */ +/* WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M+BDSPAC) */ + + sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & + work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + sgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, + &a[a_offset], lda, &c_b57, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy result to VT */ + + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & + vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out below it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*M*M+4*M, */ +/* prefer 2*M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*M*M+4*M-1, */ +/* prefer 2*M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (Workspace: need 2*M*M+BDSPAC) */ + + sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, + &a[a_offset], lda, &c_b57, &vt[vt_offset], + ldvt); + +/* Copy left singular vectors of L to A */ +/* (Workspace: need M*M) */ + + slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors of L in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, compute left */ +/* singular vectors of A in A and compute right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, & + c__1, &work[iwork], info); + + } + + } else if (wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is LDA by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku); + +/* Generate Q in A */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need M*M+4*M-1, */ +/* prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (Workspace: need M*M+BDSPAC) */ + + sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, + &a[a_offset], lda, &c_b57, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], + ldu); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 + << 1) + 1], ldu); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } else if (wntva) { + + if (wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Copy L to WORK(IR), zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + ldwrkr], &ldwrkr); + +/* Generate Q in VT */ +/* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need M*M+4*M-1, */ +/* prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M+BDSPAC) */ + + sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], & + work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + sgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, + &vt[vt_offset], ldvt, &c_b57, &a[a_offset], + lda); + +/* Copy right singular vectors of A from A to VT */ + + slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M+N, prefer M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], & + vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & + work[iwork], info); + + } + + } else if (wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*M*M+4*M, */ +/* prefer 2*M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*M*M+4*M-1, */ +/* prefer 2*M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (Workspace: need 2*M*M+BDSPAC) */ + + sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, + &vt[vt_offset], ldvt, &c_b57, &a[a_offset], + lda); + +/* Copy right singular vectors of A from A to VT */ + + slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Copy left singular vectors of A from WORK(IR) to A */ + + slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M+N, prefer M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, & + c__1, &work[iwork], info); + + } + + } else if (wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3); + if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by M */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is M by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (Workspace: need M*M+BDSPAC) */ + + sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, + &vt[vt_offset], ldvt, &c_b57, &a[a_offset], + lda); + +/* Copy right singular vectors of A from A to VT */ + + slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt); + +/* Generate Q in VT */ +/* (Workspace: need M+N, prefer M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], + ldu); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 + << 1) + 1], ldu); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (Workspace: need 3*M+N, prefer 3*M+N*NB) */ + + i__2 = *lwork - iwork + 1; + sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info); + + } + + } + + } + + } else { + +/* N .LT. MNTHR */ + +/* Path 10t(N greater than M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__2 = *lwork - iwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */ + + slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) */ + + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + if (wntva) { + nrvt = *n; + } + if (wntvs) { + nrvt = *m; + } + i__2 = *lwork - iwork + 1; + sorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__2 = *lwork - iwork + 1; + sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr); + } + iwork = ie + *m; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info); + } + + } + + } + +/* If SBDSQR failed to converge, copy unconverged superdiagonals */ +/* to WORK( 2:MINMN ) */ + + if (*info != 0) { + if (ie > 2) { + i__2 = minmn - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__ + 1] = work[i__ + ie - 1]; +/* L50: */ + } + } + if (ie < 2) { + for (i__ = minmn - 1; i__ >= 1; --i__) { + work[i__ + 1] = work[i__ + ie - 1]; +/* L60: */ + } + } + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm > bignum) { + i__2 = minmn - 1; + slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2], + &minmn, &ierr); + } + if (anrm < smlnum) { + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (*info != 0 && anrm < smlnum) { + i__2 = minmn - 1; + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2], + &minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (real) maxwrk; + + return 0; + +/* End of SGESVD */ + +} /* sgesvd_ */ + diff --git a/lapack-netlib/SRC/sgesvdq.c b/lapack-netlib/SRC/sgesvdq.c new file mode 100644 index 000000000..e2de2d961 --- /dev/null +++ b/lapack-netlib/SRC/sgesvdq.c @@ -0,0 +1,2124 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method + for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGESVDQ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGESVDQ( JOBA, JOBP, JOBR, JOBU, JOBV, M, N, A, LDA, */ +/* S, U, LDU, V, LDV, NUMRANK, IWORK, LIWORK, */ +/* WORK, LWORK, RWORK, LRWORK, INFO ) */ + +/* IMPLICIT NONE */ +/* CHARACTER JOBA, JOBP, JOBR, JOBU, JOBV */ +/* INTEGER M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LWORK, LRWORK, */ +/* INFO */ +/* REAL A( LDA, * ), U( LDU, * ), V( LDV, * ), WORK( * ) */ +/* REAL S( * ), RWORK( * ) */ +/* INTEGER IWORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGESVDQ computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, where M >= N. The SVD of A is written as */ +/* > [++] [xx] [x0] [xx] */ +/* > A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] */ +/* > [++] [xx] */ +/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */ +/* > matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */ +/* > of SIGMA are the singular values of A. The columns of U and V are the */ +/* > left and the right singular vectors of A, respectively. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBA */ +/* > \verbatim */ +/* > JOBA is CHARACTER*1 */ +/* > Specifies the level of accuracy in the computed SVD */ +/* > = 'A' The requested accuracy corresponds to having the backward */ +/* > error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F, */ +/* > where EPS = SLAMCH('Epsilon'). This authorises CGESVDQ to */ +/* > truncate the computed triangular factor in a rank revealing */ +/* > QR factorization whenever the truncated part is below the */ +/* > threshold of the order of EPS * ||A||_F. This is aggressive */ +/* > truncation level. */ +/* > = 'M' Similarly as with 'A', but the truncation is more gentle: it */ +/* > is allowed only when there is a drop on the diagonal of the */ +/* > triangular factor in the QR factorization. This is medium */ +/* > truncation level. */ +/* > = 'H' High accuracy requested. No numerical rank determination based */ +/* > on the rank revealing QR factorization is attempted. */ +/* > = 'E' Same as 'H', and in addition the condition number of column */ +/* > scaled A is estimated and returned in RWORK(1). */ +/* > N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBP */ +/* > \verbatim */ +/* > JOBP is CHARACTER*1 */ +/* > = 'P' The rows of A are ordered in decreasing order with respect to */ +/* > ||A(i,:)||_\infty. This enhances numerical accuracy at the cost */ +/* > of extra data movement. Recommended for numerical robustness. */ +/* > = 'N' No row pivoting. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBR */ +/* > \verbatim */ +/* > JOBR is CHARACTER*1 */ +/* > = 'T' After the initial pivoted QR factorization, SGESVD is applied to */ +/* > the transposed R**T of the computed triangular factor R. This involves */ +/* > some extra data movement (matrix transpositions). Useful for */ +/* > experiments, research and development. */ +/* > = 'N' The triangular factor R is given as input to SGESVD. This may be */ +/* > preferred as it involves less data movement. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > = 'A' All M left singular vectors are computed and returned in the */ +/* > matrix U. See the description of U. */ +/* > = 'S' or 'U' N = f2cmin(M,N) left singular vectors are computed and returned */ +/* > in the matrix U. See the description of U. */ +/* > = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular */ +/* > vectors are computed and returned in the matrix U. */ +/* > = 'F' The N left singular vectors are returned in factored form as the */ +/* > product of the Q factor from the initial QR factorization and the */ +/* > N left singular vectors of (R**T , 0)**T. If row pivoting is used, */ +/* > then the necessary information on the row pivoting is stored in */ +/* > IWORK(N+1:N+M-1). */ +/* > = 'N' The left singular vectors are not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > = 'A', 'V' All N right singular vectors are computed and returned in */ +/* > the matrix V. */ +/* > = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular */ +/* > vectors are computed and returned in the matrix V. This option is */ +/* > allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal. */ +/* > = 'N' The right singular vectors are not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array of dimensions LDA x N */ +/* > On entry, the input matrix A. */ +/* > On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains */ +/* > the Householder vectors as stored by SGEQP3. If JOBU = 'F', these Householder */ +/* > vectors together with WORK(1:N) can be used to restore the Q factors from */ +/* > the initial pivoted QR factorization of A. See the description of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER. */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is REAL array of dimension N. */ +/* > The singular values of A, ordered so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is REAL array, dimension */ +/* > LDU x M if JOBU = 'A'; see the description of LDU. In this case, */ +/* > on exit, U contains the M left singular vectors. */ +/* > LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this */ +/* > case, U contains the leading N or the leading NUMRANK left singular vectors. */ +/* > LDU x N if JOBU = 'F' ; see the description of LDU. In this case U */ +/* > contains N x N orthogonal matrix that can be used to form the left */ +/* > singular vectors. */ +/* > If JOBU = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER. */ +/* > The leading dimension of the array U. */ +/* > If JOBU = 'A', 'S', 'U', 'R', LDU >= f2cmax(1,M). */ +/* > If JOBU = 'F', LDU >= f2cmax(1,N). */ +/* > Otherwise, LDU >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is REAL array, dimension */ +/* > LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' . */ +/* > If JOBV = 'A', or 'V', V contains the N-by-N orthogonal matrix V**T; */ +/* > If JOBV = 'R', V contains the first NUMRANK rows of V**T (the right */ +/* > singular vectors, stored rowwise, of the NUMRANK largest singular values). */ +/* > If JOBV = 'N' and JOBA = 'E', V is used as a workspace. */ +/* > If JOBV = 'N', and JOBA.NE.'E', V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If JOBV = 'A', 'V', 'R', or JOBA = 'E', LDV >= f2cmax(1,N). */ +/* > Otherwise, LDV >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] NUMRANK */ +/* > \verbatim */ +/* > NUMRANK is INTEGER */ +/* > NUMRANK is the numerical rank first determined after the rank */ +/* > revealing QR factorization, following the strategy specified by the */ +/* > value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK */ +/* > leading singular values and vectors are then requested in the call */ +/* > of SGESVD. The final value of NUMRANK might be further reduced if */ +/* > some singular values are computed as zeros. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (f2cmax(1, LIWORK)). */ +/* > On exit, IWORK(1:N) contains column pivoting permutation of the */ +/* > rank revealing QR factorization. */ +/* > If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence */ +/* > of row swaps used in row pivoting. These can be used to restore the */ +/* > left singular vectors in the case JOBU = 'F'. */ +/* > */ +/* > If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */ +/* > LIWORK(1) returns the minimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > LIWORK >= N + M - 1, if JOBP = 'P' and JOBA .NE. 'E'; */ +/* > LIWORK >= N if JOBP = 'N' and JOBA .NE. 'E'; */ +/* > LIWORK >= N + M - 1 + N, if JOBP = 'P' and JOBA = 'E'; */ +/* > LIWORK >= N + N if JOBP = 'N' and JOBA = 'E'. */ + +/* > If LIWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates and returns the optimal and minimal sizes */ +/* > for the WORK, IWORK, and RWORK arrays, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (f2cmax(2, LWORK)), used as a workspace. */ +/* > On exit, if, on entry, LWORK.NE.-1, WORK(1:N) contains parameters */ +/* > needed to recover the Q factor from the QR factorization computed by */ +/* > SGEQP3. */ +/* > */ +/* > If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */ +/* > WORK(1) returns the optimal LWORK, and */ +/* > WORK(2) returns the minimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. It is determined as follows: */ +/* > Let LWQP3 = 3*N+1, LWCON = 3*N, and let */ +/* > LWORQ = { MAX( N, 1 ), if JOBU = 'R', 'S', or 'U' */ +/* > { MAX( M, 1 ), if JOBU = 'A' */ +/* > LWSVD = MAX( 5*N, 1 ) */ +/* > LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 5*(N/2), 1 ), LWORLQ = MAX( N, 1 ), */ +/* > LWQRF = MAX( N/2, 1 ), LWORQ2 = MAX( N, 1 ) */ +/* > Then the minimal value of LWORK is: */ +/* > = MAX( N + LWQP3, LWSVD ) if only the singular values are needed; */ +/* > = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed, */ +/* > and a scaled condition estimate requested; */ +/* > */ +/* > = N + MAX( LWQP3, LWSVD, LWORQ ) if the singular values and the left */ +/* > singular vectors are requested; */ +/* > = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the singular values and the left */ +/* > singular vectors are requested, and also */ +/* > a scaled condition estimate requested; */ +/* > */ +/* > = N + MAX( LWQP3, LWSVD ) if the singular values and the right */ +/* > singular vectors are requested; */ +/* > = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right */ +/* > singular vectors are requested, and also */ +/* > a scaled condition etimate requested; */ +/* > */ +/* > = N + MAX( LWQP3, LWSVD, LWORQ ) if the full SVD is requested with JOBV = 'R'; */ +/* > independent of JOBR; */ +/* > = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the full SVD is requested, */ +/* > JOBV = 'R' and, also a scaled condition */ +/* > estimate requested; independent of JOBR; */ +/* > = MAX( N + MAX( LWQP3, LWSVD, LWORQ ), */ +/* > N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ) ) if the */ +/* > full SVD is requested with JOBV = 'A' or 'V', and */ +/* > JOBR ='N' */ +/* > = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ), */ +/* > N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ ) ) */ +/* > if the full SVD is requested with JOBV = 'A' or 'V', and */ +/* > JOBR ='N', and also a scaled condition number estimate */ +/* > requested. */ +/* > = MAX( N + MAX( LWQP3, LWSVD, LWORQ ), */ +/* > N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) if the */ +/* > full SVD is requested with JOBV = 'A', 'V', and JOBR ='T' */ +/* > = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ), */ +/* > N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) */ +/* > if the full SVD is requested with JOBV = 'A' or 'V', and */ +/* > JOBR ='T', and also a scaled condition number estimate */ +/* > requested. */ +/* > Finally, LWORK must be at least two: LWORK = MAX( 2, LWORK ). */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates and returns the optimal and minimal sizes */ +/* > for the WORK, IWORK, and RWORK arrays, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is REAL array, dimension (f2cmax(1, LRWORK)). */ +/* > On exit, */ +/* > 1. If JOBA = 'E', RWORK(1) contains an estimate of the condition */ +/* > number of column scaled A. If A = C * D where D is diagonal and C */ +/* > has unit columns in the Euclidean norm, then, assuming full column rank, */ +/* > N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1). */ +/* > Otherwise, RWORK(1) = -1. */ +/* > 2. RWORK(2) contains the number of singular values computed as */ +/* > exact zeros in SGESVD applied to the upper triangular or trapeziodal */ +/* > R (from the initial QR factorization). In case of early exit (no call to */ +/* > SGESVD, such as in the case of zero matrix) RWORK(2) = -1. */ +/* > */ +/* > If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0, */ +/* > RWORK(1) returns the minimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER. */ +/* > The dimension of the array RWORK. */ +/* > If JOBP ='P', then LRWORK >= MAX(2, M). */ +/* > Otherwise, LRWORK >= 2 */ + +/* > If LRWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates and returns the optimal and minimal sizes */ +/* > for the WORK, IWORK, and RWORK arrays, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if SBDSQR did not converge, INFO specifies how many superdiagonals */ +/* > of an intermediate bidiagonal form B (computed in SGESVD) did not */ +/* > converge to zero. */ +/* > \endverbatim */ + +/* > \par Further Details: */ +/* ======================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > 1. The data movement (matrix transpose) is coded using simple nested */ +/* > DO-loops because BLAS and LAPACK do not provide corresponding subroutines. */ +/* > Those DO-loops are easily identified in this source code - by the CONTINUE */ +/* > statements labeled with 11**. In an optimized version of this code, the */ +/* > nested DO loops should be replaced with calls to an optimized subroutine. */ +/* > 2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause */ +/* > column norm overflow. This is the minial precaution and it is left to the */ +/* > SVD routine (CGESVD) to do its own preemptive scaling if potential over- */ +/* > or underflows are detected. To avoid repeated scanning of the array A, */ +/* > an optimal implementation would do all necessary scaling before calling */ +/* > CGESVD and the scaling in CGESVD can be switched off. */ +/* > 3. Other comments related to code optimization are given in comments in the */ +/* > code, enlosed in [[double brackets]]. */ +/* > \endverbatim */ + +/* > \par Bugs, examples and comments */ +/* =========================== */ + +/* > \verbatim */ +/* > Please report all bugs and send interesting examples and/or comments to */ +/* > drmac@math.hr. Thank you. */ +/* > \endverbatim */ + +/* > \par References */ +/* =============== */ + +/* > \verbatim */ +/* > [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for */ +/* > Computing the SVD with High Accuracy. ACM Trans. Math. Softw. */ +/* > 44(1): 11:1-11:30 (2017) */ +/* > */ +/* > SIGMA library, xGESVDQ section updated February 2016. */ +/* > Developed and coded by Zlatko Drmac, Department of Mathematics */ +/* > University of Zagreb, Croatia, drmac@math.hr */ +/* > \endverbatim */ + + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > Developed and coded by Zlatko Drmac, Department of Mathematics */ +/* > University of Zagreb, Croatia, drmac@math.hr */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2018 */ + +/* > \ingroup realGEsing */ + +/* ===================================================================== */ +/* Subroutine */ int sgesvdq_(char *joba, char *jobp, char *jobr, char *jobu, + char *jobv, integer *m, integer *n, real *a, integer *lda, real *s, + real *u, integer *ldu, real *v, integer *ldv, integer *numrank, + integer *iwork, integer *liwork, real *work, integer *lwork, real * + rwork, integer *lrwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2; + real r__1, r__2, r__3; + + /* Local variables */ + integer lwrk_sormlq__, lwrk_sormqr__, ierr, lwrk_sgesvd2__; + real rtmp; + integer lwrk_sormqr2__, optratio; + logical lsvc0; + extern real snrm2_(integer *, real *, integer *); + logical accla; + integer lwqp3; + logical acclh, acclm; + integer p, q; + logical conda; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + integer iwoff; + logical lsvec; + real sfmin, epsln; + integer lwcon; + logical rsvec; + integer lwlqf, lwqrf, n1, lwsvd; + logical dntwu, dntwv, wntua; + integer lworq; + logical wntuf, wntva, wntur, wntus, wntvr; + extern /* Subroutine */ int sgeqp3_(integer *, integer *, real *, integer + *, integer *, real *, real *, integer *, integer *); + integer lwsvd2, lworq2, nr; + real sconda; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgelqf_( + integer *, integer *, real *, integer *, real *, real *, integer * + , integer *), slascl_(char *, integer *, integer *, real *, real * + , integer *, integer *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), sgesvd_(char *, char *, + integer *, integer *, real *, integer *, real *, real *, integer * + , real *, integer *, real *, integer *, integer *) + , slacpy_(char *, integer *, integer *, real *, integer *, real *, + integer *), slaset_(char *, integer *, integer *, real *, + real *, real *, integer *), slapmt_(logical *, integer *, + integer *, real *, integer *, integer *), spocon_(char *, + integer *, real *, integer *, real *, real *, real *, integer *, + integer *); + integer minwrk; + logical rtrans; + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *); + real rdummy[1]; + extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + logical lquery; + integer lwunlq; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + integer optwrk; + logical rowprm; + real big; + integer minwrk2; + logical ascaled; + integer optwrk2, lwrk_sgeqp3__, iminwrk, rminwrk, lwrk_sgelqf__, + lwrk_sgeqrf__, lwrk_sgesvd__; + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --iwork; + --work; + --rwork; + + /* Function Body */ + wntus = lsame_(jobu, "S") || lsame_(jobu, "U"); + wntur = lsame_(jobu, "R"); + wntua = lsame_(jobu, "A"); + wntuf = lsame_(jobu, "F"); + lsvc0 = wntus || wntur || wntua; + lsvec = lsvc0 || wntuf; + dntwu = lsame_(jobu, "N"); + + wntvr = lsame_(jobv, "R"); + wntva = lsame_(jobv, "A") || lsame_(jobv, "V"); + rsvec = wntvr || wntva; + dntwv = lsame_(jobv, "N"); + + accla = lsame_(joba, "A"); + acclm = lsame_(joba, "M"); + conda = lsame_(joba, "E"); + acclh = lsame_(joba, "H") || conda; + + rowprm = lsame_(jobp, "P"); + rtrans = lsame_(jobr, "T"); + + if (rowprm) { + if (conda) { +/* Computing MAX */ + i__1 = 1, i__2 = *n + *m - 1 + *n; + iminwrk = f2cmax(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = *n + *m - 1; + iminwrk = f2cmax(i__1,i__2); + } + rminwrk = f2cmax(2,*m); + } else { + if (conda) { +/* Computing MAX */ + i__1 = 1, i__2 = *n + *n; + iminwrk = f2cmax(i__1,i__2); + } else { + iminwrk = f2cmax(1,*n); + } + rminwrk = 2; + } + lquery = *liwork == -1 || *lwork == -1 || *lrwork == -1; + *info = 0; + if (! (accla || acclm || acclh)) { + *info = -1; + } else if (! (rowprm || lsame_(jobp, "N"))) { + *info = -2; + } else if (! (rtrans || lsame_(jobr, "N"))) { + *info = -3; + } else if (! (lsvec || dntwu)) { + *info = -4; + } else if (wntur && wntva) { + *info = -5; + } else if (! (rsvec || dntwv)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || *n > *m) { + *info = -7; + } else if (*lda < f2cmax(1,*m)) { + *info = -9; + } else if (*ldu < 1 || lsvc0 && *ldu < *m || wntuf && *ldu < *n) { + *info = -12; + } else if (*ldv < 1 || rsvec && *ldv < *n || conda && *ldv < *n) { + *info = -14; + } else if (*liwork < iminwrk && ! lquery) { + *info = -17; + } + + + if (*info == 0) { +/* [[The expressions for computing the minimal and the optimal */ +/* values of LWORK are written with a lot of redundancy and */ +/* can be simplified. However, this detailed form is easier for */ +/* maintenance and modifications of the code.]] */ + + lwqp3 = *n * 3 + 1; + if (wntus || wntur) { + lworq = f2cmax(*n,1); + } else if (wntua) { + lworq = f2cmax(*m,1); + } + lwcon = *n * 3; +/* Computing MAX */ + i__1 = *n * 5; + lwsvd = f2cmax(i__1,1); + if (lquery) { + sgeqp3_(m, n, &a[a_offset], lda, &iwork[1], rdummy, rdummy, &c_n1, + &ierr); + lwrk_sgeqp3__ = (integer) rdummy[0]; + if (wntus || wntur) { + sormqr_("L", "N", m, n, n, &a[a_offset], lda, rdummy, &u[ + u_offset], ldu, rdummy, &c_n1, &ierr); + lwrk_sormqr__ = (integer) rdummy[0]; + } else if (wntua) { + sormqr_("L", "N", m, m, n, &a[a_offset], lda, rdummy, &u[ + u_offset], ldu, rdummy, &c_n1, &ierr); + lwrk_sormqr__ = (integer) rdummy[0]; + } else { + lwrk_sormqr__ = 0; + } + } + minwrk = 2; + optwrk = 2; + if (! (lsvec || rsvec)) { +/* only the singular values are requested */ + if (conda) { +/* Computing MAX */ + i__1 = *n + lwqp3, i__1 = f2cmax(i__1,lwcon); + minwrk = f2cmax(i__1,lwsvd); + } else { +/* Computing MAX */ + i__1 = *n + lwqp3; + minwrk = f2cmax(i__1,lwsvd); + } + if (lquery) { + sgesvd_("N", "N", n, n, &a[a_offset], lda, &s[1], &u[u_offset] + , ldu, &v[v_offset], ldv, rdummy, &c_n1, &ierr); + lwrk_sgesvd__ = (integer) rdummy[0]; + if (conda) { +/* Computing MAX */ + i__1 = *n + lwrk_sgeqp3__, i__2 = *n + lwcon, i__1 = f2cmax( + i__1,i__2); + optwrk = f2cmax(i__1,lwrk_sgesvd__); + } else { +/* Computing MAX */ + i__1 = *n + lwrk_sgeqp3__; + optwrk = f2cmax(i__1,lwrk_sgesvd__); + } + } + } else if (lsvec && ! rsvec) { +/* singular values and the left singular vectors are requested */ + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwcon), i__1 = f2cmax(i__1,lwsvd); + minwrk = *n + f2cmax(i__1,lworq); + } else { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwsvd); + minwrk = *n + f2cmax(i__1,lworq); + } + if (lquery) { + if (rtrans) { + sgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + } else { + sgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + } + lwrk_sgesvd__ = (integer) rdummy[0]; + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwrk_sgeqp3__,lwcon), i__1 = f2cmax(i__1, + lwrk_sgesvd__); + optwrk = *n + f2cmax(i__1,lwrk_sormqr__); + } else { +/* Computing MAX */ + i__1 = f2cmax(lwrk_sgeqp3__,lwrk_sgesvd__); + optwrk = *n + f2cmax(i__1,lwrk_sormqr__); + } + } + } else if (rsvec && ! lsvec) { +/* singular values and the right singular vectors are requested */ + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwcon); + minwrk = *n + f2cmax(i__1,lwsvd); + } else { + minwrk = *n + f2cmax(lwqp3,lwsvd); + } + if (lquery) { + if (rtrans) { + sgesvd_("O", "N", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + } else { + sgesvd_("N", "O", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + } + lwrk_sgesvd__ = (integer) rdummy[0]; + if (conda) { +/* Computing MAX */ + i__1 = f2cmax(lwrk_sgeqp3__,lwcon); + optwrk = *n + f2cmax(i__1,lwrk_sgesvd__); + } else { + optwrk = *n + f2cmax(lwrk_sgeqp3__,lwrk_sgesvd__); + } + } + } else { +/* full SVD is requested */ + if (rtrans) { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwsvd); + minwrk = f2cmax(i__1,lworq); + if (conda) { + minwrk = f2cmax(minwrk,lwcon); + } + minwrk += *n; + if (wntva) { +/* Computing MAX */ + i__1 = *n / 2; + lwqrf = f2cmax(i__1,1); +/* Computing MAX */ + i__1 = *n / 2 * 5; + lwsvd2 = f2cmax(i__1,1); + lworq2 = f2cmax(*n,1); +/* Computing MAX */ + i__1 = lwqp3, i__2 = *n / 2 + lwqrf, i__1 = f2cmax(i__1,i__2) + , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), + i__2 = *n / 2 + lworq2, i__1 = f2cmax(i__1,i__2); + minwrk2 = f2cmax(i__1,lworq); + if (conda) { + minwrk2 = f2cmax(minwrk2,lwcon); + } + minwrk2 = *n + minwrk2; + minwrk = f2cmax(minwrk,minwrk2); + } + } else { +/* Computing MAX */ + i__1 = f2cmax(lwqp3,lwsvd); + minwrk = f2cmax(i__1,lworq); + if (conda) { + minwrk = f2cmax(minwrk,lwcon); + } + minwrk += *n; + if (wntva) { +/* Computing MAX */ + i__1 = *n / 2; + lwlqf = f2cmax(i__1,1); +/* Computing MAX */ + i__1 = *n / 2 * 5; + lwsvd2 = f2cmax(i__1,1); + lwunlq = f2cmax(*n,1); +/* Computing MAX */ + i__1 = lwqp3, i__2 = *n / 2 + lwlqf, i__1 = f2cmax(i__1,i__2) + , i__2 = *n / 2 + lwsvd2, i__1 = f2cmax(i__1,i__2), + i__2 = *n / 2 + lwunlq, i__1 = f2cmax(i__1,i__2); + minwrk2 = f2cmax(i__1,lworq); + if (conda) { + minwrk2 = f2cmax(minwrk2,lwcon); + } + minwrk2 = *n + minwrk2; + minwrk = f2cmax(minwrk,minwrk2); + } + } + if (lquery) { + if (rtrans) { + sgesvd_("O", "A", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + lwrk_sgesvd__ = (integer) rdummy[0]; +/* Computing MAX */ + i__1 = f2cmax(lwrk_sgeqp3__,lwrk_sgesvd__); + optwrk = f2cmax(i__1,lwrk_sormqr__); + if (conda) { + optwrk = f2cmax(optwrk,lwcon); + } + optwrk = *n + optwrk; + if (wntva) { + i__1 = *n / 2; + sgeqrf_(n, &i__1, &u[u_offset], ldu, rdummy, rdummy, & + c_n1, &ierr); + lwrk_sgeqrf__ = (integer) rdummy[0]; + i__1 = *n / 2; + i__2 = *n / 2; + sgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[ + 1], &u[u_offset], ldu, &v[v_offset], ldv, + rdummy, &c_n1, &ierr); + lwrk_sgesvd2__ = (integer) rdummy[0]; + i__1 = *n / 2; + sormqr_("R", "C", n, n, &i__1, &u[u_offset], ldu, + rdummy, &v[v_offset], ldv, rdummy, &c_n1, & + ierr); + lwrk_sormqr2__ = (integer) rdummy[0]; +/* Computing MAX */ + i__1 = lwrk_sgeqp3__, i__2 = *n / 2 + lwrk_sgeqrf__, + i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + + lwrk_sgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = + *n / 2 + lwrk_sormqr2__; + optwrk2 = f2cmax(i__1,i__2); + if (conda) { + optwrk2 = f2cmax(optwrk2,lwcon); + } + optwrk2 = *n + optwrk2; + optwrk = f2cmax(optwrk,optwrk2); + } + } else { + sgesvd_("S", "O", n, n, &a[a_offset], lda, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, rdummy, &c_n1, + &ierr); + lwrk_sgesvd__ = (integer) rdummy[0]; +/* Computing MAX */ + i__1 = f2cmax(lwrk_sgeqp3__,lwrk_sgesvd__); + optwrk = f2cmax(i__1,lwrk_sormqr__); + if (conda) { + optwrk = f2cmax(optwrk,lwcon); + } + optwrk = *n + optwrk; + if (wntva) { + i__1 = *n / 2; + sgelqf_(&i__1, n, &u[u_offset], ldu, rdummy, rdummy, & + c_n1, &ierr); + lwrk_sgelqf__ = (integer) rdummy[0]; + i__1 = *n / 2; + i__2 = *n / 2; + sgesvd_("S", "O", &i__1, &i__2, &v[v_offset], ldv, &s[ + 1], &u[u_offset], ldu, &v[v_offset], ldv, + rdummy, &c_n1, &ierr); + lwrk_sgesvd2__ = (integer) rdummy[0]; + i__1 = *n / 2; + sormlq_("R", "N", n, n, &i__1, &u[u_offset], ldu, + rdummy, &v[v_offset], ldv, rdummy, &c_n1, & + ierr); + lwrk_sormlq__ = (integer) rdummy[0]; +/* Computing MAX */ + i__1 = lwrk_sgeqp3__, i__2 = *n / 2 + lwrk_sgelqf__, + i__1 = f2cmax(i__1,i__2), i__2 = *n / 2 + + lwrk_sgesvd2__, i__1 = f2cmax(i__1,i__2), i__2 = + *n / 2 + lwrk_sormlq__; + optwrk2 = f2cmax(i__1,i__2); + if (conda) { + optwrk2 = f2cmax(optwrk2,lwcon); + } + optwrk2 = *n + optwrk2; + optwrk = f2cmax(optwrk,optwrk2); + } + } + } + } + + minwrk = f2cmax(2,minwrk); + optwrk = f2cmax(2,optwrk); + if (*lwork < minwrk && ! lquery) { + *info = -19; + } + + } + + if (*info == 0 && *lrwork < rminwrk && ! lquery) { + *info = -21; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESVDQ", &i__1, (ftnlen)7); + return 0; + } else if (lquery) { + +/* Return optimal workspace */ + + iwork[1] = iminwrk; + work[1] = (real) optwrk; + work[2] = (real) minwrk; + rwork[1] = (real) rminwrk; + return 0; + } + +/* Quick return if the matrix is void. */ + + if (*m == 0 || *n == 0) { + return 0; + } + + big = slamch_("O"); + ascaled = FALSE_; + iwoff = 1; + if (rowprm) { + iwoff = *m; +/* ell-infinity norm - this enhances numerical robustness in */ +/* the case of differently scaled rows. */ + i__1 = *m; + for (p = 1; p <= i__1; ++p) { +/* RWORK(p) = ABS( A(p,ICAMAX(N,A(p,1),LDA)) ) */ +/* [[SLANGE will return NaN if an entry of the p-th row is Nan]] */ + rwork[p] = slange_("M", &c__1, n, &a[p + a_dim1], lda, rdummy); + if (rwork[p] != rwork[p] || rwork[p] * 0.f != 0.f) { + *info = -8; + i__2 = -(*info); + xerbla_("SGESVDQ", &i__2, (ftnlen)7); + return 0; + } +/* L1904: */ + } + i__1 = *m - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *m - p + 1; + q = isamax_(&i__2, &rwork[p], &c__1) + p - 1; + iwork[*n + p] = q; + if (p != q) { + rtmp = rwork[p]; + rwork[p] = rwork[q]; + rwork[q] = rtmp; + } +/* L1952: */ + } + + if (rwork[1] == 0.f) { +/* Quick return: A is the M x N zero matrix. */ + *numrank = 0; + slaset_("G", n, &c__1, &c_b72, &c_b72, &s[1], n); + if (wntus) { + slaset_("G", m, n, &c_b72, &c_b76, &u[u_offset], ldu); + } + if (wntua) { + slaset_("G", m, m, &c_b72, &c_b76, &u[u_offset], ldu); + } + if (wntva) { + slaset_("G", n, n, &c_b72, &c_b76, &v[v_offset], ldv); + } + if (wntuf) { + slaset_("G", n, &c__1, &c_b72, &c_b72, &work[1], n) + ; + slaset_("G", m, n, &c_b72, &c_b76, &u[u_offset], ldu); + } + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + iwork[p] = p; +/* L5001: */ + } + if (rowprm) { + i__1 = *n + *m - 1; + for (p = *n + 1; p <= i__1; ++p) { + iwork[p] = p - *n; +/* L5002: */ + } + } + if (conda) { + rwork[1] = -1.f; + } + rwork[2] = -1.f; + return 0; + } + + if (rwork[1] > big / sqrt((real) (*m))) { +/* matrix by 1/sqrt(M) if too large entry detected */ + r__1 = sqrt((real) (*m)); + slascl_("G", &c__0, &c__0, &r__1, &c_b76, m, n, &a[a_offset], lda, + &ierr); + ascaled = TRUE_; + } + i__1 = *m - 1; + slaswp_(n, &a[a_offset], lda, &c__1, &i__1, &iwork[*n + 1], &c__1); + } + +/* norms overflows during the QR factorization. The SVD procedure should */ +/* have its own scaling to save the singular values from overflows and */ +/* underflows. That depends on the SVD procedure. */ + + if (! rowprm) { + rtmp = slange_("M", m, n, &a[a_offset], lda, rdummy); + if (rtmp != rtmp || rtmp * 0.f != 0.f) { + *info = -8; + i__1 = -(*info); + xerbla_("SGESVDQ", &i__1, (ftnlen)7); + return 0; + } + if (rtmp > big / sqrt((real) (*m))) { +/* matrix by 1/sqrt(M) if too large entry detected */ + r__1 = sqrt((real) (*m)); + slascl_("G", &c__0, &c__0, &r__1, &c_b76, m, n, &a[a_offset], lda, + &ierr); + ascaled = TRUE_; + } + } + + +/* A * P = Q * [ R ] */ +/* [ 0 ] */ + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + iwork[p] = 0; +/* L1963: */ + } + i__1 = *lwork - *n; + sgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &work[1], &work[*n + 1], & + i__1, &ierr); + +/* If the user requested accuracy level allows truncation in the */ +/* computed upper triangular factor, the matrix R is examined and, */ +/* if possible, replaced with its leading upper trapezoidal part. */ + + epsln = slamch_("E"); + sfmin = slamch_("S"); +/* SMALL = SFMIN / EPSLN */ + nr = *n; + + if (accla) { + +/* Standard absolute error bound suffices. All sigma_i with */ +/* sigma_i < N*EPS*||A||_F are flushed to zero. This is an */ +/* aggressive enforcement of lower numerical rank by introducing a */ +/* backward error of the order of N*EPS*||A||_F. */ + nr = 1; + rtmp = sqrt((real) (*n)) * epsln; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((r__2 = a[p + p * a_dim1], abs(r__2)) < rtmp * (r__1 = a[ + a_dim1 + 1], abs(r__1))) { + goto L3002; + } + ++nr; +/* L3001: */ + } +L3002: + + ; + } else if (acclm) { +/* Sudden drop on the diagonal of R is used as the criterion for being */ +/* close-to-rank-deficient. The threshold is set to EPSLN=SLAMCH('E'). */ +/* [[This can be made more flexible by replacing this hard-coded value */ +/* with a user specified threshold.]] Also, the values that underflow */ +/* will be truncated. */ + nr = 1; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((r__2 = a[p + p * a_dim1], abs(r__2)) < epsln * (r__1 = a[p - + 1 + (p - 1) * a_dim1], abs(r__1)) || (r__3 = a[p + p * + a_dim1], abs(r__3)) < sfmin) { + goto L3402; + } + ++nr; +/* L3401: */ + } +L3402: + + ; + } else { +/* obvious case of zero pivots. */ +/* R(i,i)=0 => R(i:N,i:N)=0. */ + nr = 1; + i__1 = *n; + for (p = 2; p <= i__1; ++p) { + if ((r__1 = a[p + p * a_dim1], abs(r__1)) == 0.f) { + goto L3502; + } + ++nr; +/* L3501: */ + } +L3502: + + if (conda) { +/* Estimate the scaled condition number of A. Use the fact that it is */ +/* the same as the scaled condition number of R. */ + slacpy_("U", n, n, &a[a_offset], lda, &v[v_offset], ldv); +/* Only the leading NR x NR submatrix of the triangular factor */ +/* is considered. Only if NR=N will this give a reliable error */ +/* bound. However, even for NR < N, this can be used on an */ +/* expert level and obtain useful information in the sense of */ +/* perturbation theory. */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + rtmp = snrm2_(&p, &v[p * v_dim1 + 1], &c__1); + r__1 = 1.f / rtmp; + sscal_(&p, &r__1, &v[p * v_dim1 + 1], &c__1); +/* L3053: */ + } + if (! (lsvec || rsvec)) { + spocon_("U", &nr, &v[v_offset], ldv, &c_b76, &rtmp, &work[1], + &iwork[*n + iwoff], &ierr); + } else { + spocon_("U", &nr, &v[v_offset], ldv, &c_b76, &rtmp, &work[*n + + 1], &iwork[*n + iwoff], &ierr); + } + sconda = 1.f / sqrt(rtmp); +/* For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1), */ +/* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA */ +/* See the reference [1] for more details. */ + } + + } + + if (wntur) { + n1 = nr; + } else if (wntus || wntuf) { + n1 = *n; + } else if (wntua) { + n1 = *m; + } + + if (! (rsvec || lsvec)) { +/* ....................................................................... */ +/* ....................................................................... */ + if (rtrans) { + +/* the upper triangle of [A] to zero. */ + i__1 = f2cmin(*n,nr); + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + a[q + p * a_dim1] = a[p + q * a_dim1]; + if (q <= nr) { + a[p + q * a_dim1] = 0.f; + } +/* L1147: */ + } +/* L1146: */ + } + + sgesvd_("N", "N", n, &nr, &a[a_offset], lda, &s[1], &u[u_offset], + ldu, &v[v_offset], ldv, &work[1], lwork, info); + + } else { + + + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &a[a_dim1 + 2], + lda); + } + sgesvd_("N", "N", &nr, n, &a[a_offset], lda, &s[1], &u[u_offset], + ldu, &v[v_offset], ldv, &work[1], lwork, info); + + } + + } else if (lsvec && ! rsvec) { +/* ....................................................................... */ +/* ......................................................................."""""""" */ + if (rtrans) { +/* vectors of R */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + u[q + p * u_dim1] = a[p + q * a_dim1]; +/* L1193: */ + } +/* L1192: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &u[(u_dim1 << 1) + + 1], ldu); + } +/* vectors overwrite [U](1:NR,1:NR) as transposed. These */ +/* will be pre-multiplied by Q to build the left singular vectors of A. */ + i__1 = *lwork - *n; + sgesvd_("N", "O", n, &nr, &u[u_offset], ldu, &s[1], &u[u_offset], + ldu, &u[u_offset], ldu, &work[*n + 1], &i__1, info); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + rtmp = u[q + p * u_dim1]; + u[q + p * u_dim1] = u[p + q * u_dim1]; + u[p + q * u_dim1] = rtmp; +/* L1120: */ + } +/* L1119: */ + } + + } else { + slacpy_("U", &nr, n, &a[a_offset], lda, &u[u_offset], ldu); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &u[u_dim1 + 2], + ldu); + } +/* vectors overwrite [U](1:NR,1:NR) */ + i__1 = *lwork - *n; + sgesvd_("O", "N", &nr, n, &u[u_offset], ldu, &s[1], &u[u_offset], + ldu, &v[v_offset], ldv, &work[*n + 1], &i__1, info); +/* R. These will be pre-multiplied by Q to build the left singular */ +/* vectors of A. */ + } + +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) * u_dim1 + + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 + (nr + + 1) * u_dim1], ldu); + } + } + +/* The Q matrix from the first QRF is built into the left singular */ +/* vectors matrix U. */ + + if (! wntuf) { + i__1 = *lwork - *n; + sormqr_("L", "N", m, &n1, n, &a[a_offset], lda, &work[1], &u[ + u_offset], ldu, &work[*n + 1], &i__1, &ierr); + } + if (rowprm && ! wntuf) { + i__1 = *m - 1; + slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], & + c_n1); + } + + } else if (rsvec && ! lsvec) { +/* ....................................................................... */ +/* ....................................................................... */ + if (rtrans) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + v[q + p * v_dim1] = a[p + q * a_dim1]; +/* L1166: */ + } +/* L1165: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << 1) + + 1], ldv); + } +/* vectors not computed */ + if (wntvr || nr == *n) { + i__1 = *lwork - *n; + sgesvd_("O", "N", n, &nr, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &u[u_offset], ldu, &work[*n + 1], & + i__1, info); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + rtmp = v[q + p * v_dim1]; + v[q + p * v_dim1] = v[p + q * v_dim1]; + v[p + q * v_dim1] = rtmp; +/* L1122: */ + } +/* L1121: */ + } + + if (nr < *n) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = nr + 1; q <= i__2; ++q) { + v[p + q * v_dim1] = v[q + p * v_dim1]; +/* L1104: */ + } +/* L1103: */ + } + } + slapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); + } else { +/* [!] This is simple implementation that augments [V](1:N,1:NR) */ +/* by padding a zero block. In the case NR << N, a more efficient */ +/* way is to first use the QR factorization. For more details */ +/* how to implement this, see the " FULL SVD " branch. */ + i__1 = *n - nr; + slaset_("G", n, &i__1, &c_b72, &c_b72, &v[(nr + 1) * v_dim1 + + 1], ldv); + i__1 = *lwork - *n; + sgesvd_("O", "N", n, n, &v[v_offset], ldv, &s[1], &u[u_offset] + , ldu, &u[u_offset], ldu, &work[*n + 1], &i__1, info); + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + rtmp = v[q + p * v_dim1]; + v[q + p * v_dim1] = v[p + q * v_dim1]; + v[p + q * v_dim1] = rtmp; +/* L1124: */ + } +/* L1123: */ + } + slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); + } + + } else { + slacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + 2], + ldv); + } +/* vectors stored in U(1:NR,1:NR) */ + if (wntvr || nr == *n) { + i__1 = *lwork - *n; + sgesvd_("N", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], & + i__1, info); + slapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); + } else { +/* [!] This is simple implementation that augments [V](1:NR,1:N) */ +/* by padding a zero block. In the case NR << N, a more efficient */ +/* way is to first use the LQ factorization. For more details */ +/* how to implement this, see the " FULL SVD " branch. */ + i__1 = *n - nr; + slaset_("G", &i__1, n, &c_b72, &c_b72, &v[nr + 1 + v_dim1], + ldv); + i__1 = *lwork - *n; + sgesvd_("N", "O", n, n, &v[v_offset], ldv, &s[1], &u[u_offset] + , ldu, &v[v_offset], ldv, &work[*n + 1], &i__1, info); + slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); + } +/* vectors of A. */ + } + + } else { +/* ....................................................................... */ +/* ....................................................................... */ + if (rtrans) { + + + if (wntvr || nr == *n) { +/* vectors of R**T */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + v[q + p * v_dim1] = a[p + q * a_dim1]; +/* L1169: */ + } +/* L1168: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << + 1) + 1], ldv); + } + +/* singular vectors of R**T stored in [U](1:NR,1:NR) as transposed */ + i__1 = *lwork - *n; + sgesvd_("O", "A", n, &nr, &v[v_offset], ldv, &s[1], &v[ + v_offset], ldv, &u[u_offset], ldu, &work[*n + 1], & + i__1, info); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + rtmp = v[q + p * v_dim1]; + v[q + p * v_dim1] = v[p + q * v_dim1]; + v[p + q * v_dim1] = rtmp; +/* L1116: */ + } +/* L1115: */ + } + if (nr < *n) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = nr + 1; q <= i__2; ++q) { + v[p + q * v_dim1] = v[q + p * v_dim1]; +/* L1102: */ + } +/* L1101: */ + } + } + slapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); + + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = nr; + for (q = p + 1; q <= i__2; ++q) { + rtmp = u[q + p * u_dim1]; + u[q + p * u_dim1] = u[p + q * u_dim1]; + u[p + q * u_dim1] = rtmp; +/* L1118: */ + } +/* L1117: */ + } + + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 + + (nr + 1) * u_dim1], ldu); + } + } + + } else { +/* vectors of R**T */ +/* [[The optimal ratio N/NR for using QRF instead of padding */ +/* with zeros. Here hard coded to 2; it must be at least */ +/* two due to work space constraints.]] */ +/* OPTRATIO = ILAENV(6, 'SGESVD', 'S' // 'O', NR,N,0,0) */ +/* OPTRATIO = MAX( OPTRATIO, 2 ) */ + optratio = 2; + if (optratio * nr > *n) { + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + v[q + p * v_dim1] = a[p + q * a_dim1]; +/* L1199: */ + } +/* L1198: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 + << 1) + 1], ldv); + } + + i__1 = *n - nr; + slaset_("A", n, &i__1, &c_b72, &c_b72, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *lwork - *n; + sgesvd_("O", "A", n, n, &v[v_offset], ldv, &s[1], &v[ + v_offset], ldv, &u[u_offset], ldu, &work[*n + 1], + &i__1, info); + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + rtmp = v[q + p * v_dim1]; + v[q + p * v_dim1] = v[p + q * v_dim1]; + v[p + q * v_dim1] = rtmp; +/* L1114: */ + } +/* L1113: */ + } + slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* (M x N1), i.e. (M x N) or (M x M). */ + + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p + 1; q <= i__2; ++q) { + rtmp = u[q + p * u_dim1]; + u[q + p * u_dim1] = u[p + q * u_dim1]; + u[p + q * u_dim1] = rtmp; +/* L1112: */ + } +/* L1111: */ + } + + if (*n < *m && ! wntuf) { + i__1 = *m - *n; + slaset_("A", &i__1, n, &c_b72, &c_b72, &u[*n + 1 + + u_dim1], ldu); + if (*n < n1) { + i__1 = n1 - *n; + slaset_("A", n, &i__1, &c_b72, &c_b72, &u[(*n + 1) + * u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[*n + + 1 + (*n + 1) * u_dim1], ldu); + } + } + } else { +/* singular vectors of R */ + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = p; q <= i__2; ++q) { + u[q + (nr + p) * u_dim1] = a[p + q * a_dim1]; +/* L1197: */ + } +/* L1196: */ + } + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &u[(nr + 2) + * u_dim1 + 1], ldu); + } + i__1 = *lwork - *n - nr; + sgeqrf_(n, &nr, &u[(nr + 1) * u_dim1 + 1], ldu, &work[*n + + 1], &work[*n + nr + 1], &i__1, &ierr); + i__1 = nr; + for (p = 1; p <= i__1; ++p) { + i__2 = *n; + for (q = 1; q <= i__2; ++q) { + v[q + p * v_dim1] = u[p + (nr + q) * u_dim1]; +/* L1144: */ + } +/* L1143: */ + } + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 << + 1) + 1], ldv); + i__1 = *lwork - *n - nr; + sgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + nr + + 1], &i__1, info); + i__1 = *n - nr; + slaset_("A", &i__1, &nr, &c_b72, &c_b72, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + slaset_("A", &nr, &i__1, &c_b72, &c_b72, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &v[nr + 1 + ( + nr + 1) * v_dim1], ldv); + i__1 = *lwork - *n - nr; + sormqr_("R", "C", n, n, &nr, &u[(nr + 1) * u_dim1 + 1], + ldu, &work[*n + 1], &v[v_offset], ldv, &work[*n + + nr + 1], &i__1, &ierr); + slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + + 1) * u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + + 1 + (nr + 1) * u_dim1], ldu); + } + } + } + } + + } else { + + + if (wntvr || nr == *n) { + slacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + 2], + ldv); + } +/* singular vectors of R stored in [U](1:NR,1:NR) */ + i__1 = *lwork - *n; + sgesvd_("S", "O", &nr, n, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], & + i__1, info); + slapmt_(&c_false, &nr, n, &v[v_offset], ldv, &iwork[1]); +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + 1) * + u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + 1 + + (nr + 1) * u_dim1], ldu); + } + } + + } else { +/* is then N1 (N or M) */ +/* [[The optimal ratio N/NR for using LQ instead of padding */ +/* with zeros. Here hard coded to 2; it must be at least */ +/* two due to work space constraints.]] */ +/* OPTRATIO = ILAENV(6, 'SGESVD', 'S' // 'O', NR,N,0,0) */ +/* OPTRATIO = MAX( OPTRATIO, 2 ) */ + optratio = 2; + if (optratio * nr > *n) { + slacpy_("U", &nr, n, &a[a_offset], lda, &v[v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &v[v_dim1 + + 2], ldv); + } +/* singular vectors of R stored in [U](1:NR,1:NR) */ + i__1 = *n - nr; + slaset_("A", &i__1, n, &c_b72, &c_b72, &v[nr + 1 + v_dim1] + , ldv); + i__1 = *lwork - *n; + sgesvd_("S", "O", n, n, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + 1], + &i__1, info); + slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* singular vectors of A. The leading N left singular vectors */ +/* are in [U](1:N,1:N) */ +/* (M x N1), i.e. (M x N) or (M x M). */ + if (*n < *m && ! wntuf) { + i__1 = *m - *n; + slaset_("A", &i__1, n, &c_b72, &c_b72, &u[*n + 1 + + u_dim1], ldu); + if (*n < n1) { + i__1 = n1 - *n; + slaset_("A", n, &i__1, &c_b72, &c_b72, &u[(*n + 1) + * u_dim1 + 1], ldu); + i__1 = *m - *n; + i__2 = n1 - *n; + slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[*n + + 1 + (*n + 1) * u_dim1], ldu); + } + } + } else { + slacpy_("U", &nr, n, &a[a_offset], lda, &u[nr + 1 + + u_dim1], ldu); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("L", &i__1, &i__2, &c_b72, &c_b72, &u[nr + 2 + + u_dim1], ldu); + } + i__1 = *lwork - *n - nr; + sgelqf_(&nr, n, &u[nr + 1 + u_dim1], ldu, &work[*n + 1], & + work[*n + nr + 1], &i__1, &ierr); + slacpy_("L", &nr, &nr, &u[nr + 1 + u_dim1], ldu, &v[ + v_offset], ldv); + if (nr > 1) { + i__1 = nr - 1; + i__2 = nr - 1; + slaset_("U", &i__1, &i__2, &c_b72, &c_b72, &v[(v_dim1 + << 1) + 1], ldv); + } + i__1 = *lwork - *n - nr; + sgesvd_("S", "O", &nr, &nr, &v[v_offset], ldv, &s[1], &u[ + u_offset], ldu, &v[v_offset], ldv, &work[*n + nr + + 1], &i__1, info); + i__1 = *n - nr; + slaset_("A", &i__1, &nr, &c_b72, &c_b72, &v[nr + 1 + + v_dim1], ldv); + i__1 = *n - nr; + slaset_("A", &nr, &i__1, &c_b72, &c_b72, &v[(nr + 1) * + v_dim1 + 1], ldv); + i__1 = *n - nr; + i__2 = *n - nr; + slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &v[nr + 1 + ( + nr + 1) * v_dim1], ldv); + i__1 = *lwork - *n - nr; + sormlq_("R", "N", n, n, &nr, &u[nr + 1 + u_dim1], ldu, & + work[*n + 1], &v[v_offset], ldv, &work[*n + nr + + 1], &i__1, &ierr); + slapmt_(&c_false, n, n, &v[v_offset], ldv, &iwork[1]); +/* (M x NR) or (M x N) or (M x M). */ + if (nr < *m && ! wntuf) { + i__1 = *m - nr; + slaset_("A", &i__1, &nr, &c_b72, &c_b72, &u[nr + 1 + + u_dim1], ldu); + if (nr < n1) { + i__1 = n1 - nr; + slaset_("A", &nr, &i__1, &c_b72, &c_b72, &u[(nr + + 1) * u_dim1 + 1], ldu); + i__1 = *m - nr; + i__2 = n1 - nr; + slaset_("A", &i__1, &i__2, &c_b72, &c_b76, &u[nr + + 1 + (nr + 1) * u_dim1], ldu); + } + } + } + } + } + +/* The Q matrix from the first QRF is built into the left singular */ +/* vectors matrix U. */ + + if (! wntuf) { + i__1 = *lwork - *n; + sormqr_("L", "N", m, &n1, n, &a[a_offset], lda, &work[1], &u[ + u_offset], ldu, &work[*n + 1], &i__1, &ierr); + } + if (rowprm && ! wntuf) { + i__1 = *m - 1; + slaswp_(&n1, &u[u_offset], ldu, &c__1, &i__1, &iwork[*n + 1], & + c_n1); + } + +/* ... end of the "full SVD" branch */ + } + +/* Check whether some singular values are returned as zeros, e.g. */ +/* due to underflow, and update the numerical rank. */ + p = nr; + for (q = p; q >= 1; --q) { + if (s[q] > 0.f) { + goto L4002; + } + --nr; +/* L4001: */ + } +L4002: + +/* singular values are set to zero. */ + if (nr < *n) { + i__1 = *n - nr; + slaset_("G", &i__1, &c__1, &c_b72, &c_b72, &s[nr + 1], n); + } +/* values. */ + if (ascaled) { + r__1 = sqrt((real) (*m)); + slascl_("G", &c__0, &c__0, &c_b76, &r__1, &nr, &c__1, &s[1], n, &ierr); + } + if (conda) { + rwork[1] = sconda; + } + rwork[2] = (real) (p - nr); +/* exact zeros in SGESVD() applied to the (possibly truncated) */ +/* full row rank triangular (trapezoidal) factor of A. */ + *numrank = nr; + + return 0; + +/* End of SGESVDQ */ + +} /* sgesvdq_ */ + diff --git a/lapack-netlib/SRC/sgesvdx.c b/lapack-netlib/SRC/sgesvdx.c new file mode 100644 index 000000000..59c1c95d5 --- /dev/null +++ b/lapack-netlib/SRC/sgesvdx.c @@ -0,0 +1,1339 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGESVDX computes the singular value decomposition (SVD) for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGESVDX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, */ +/* $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, */ +/* $ LWORK, IWORK, INFO ) */ + + +/* CHARACTER JOBU, JOBVT, RANGE */ +/* INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS */ +/* REAL VL, VU */ +/* INTEGER IWORK( * ) */ +/* REAL A( LDA, * ), S( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGESVDX computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, optionally computing the left and/or right singular */ +/* > vectors. The SVD is written */ +/* > */ +/* > A = U * SIGMA * transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ +/* > V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first f2cmin(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > SGESVDX uses an eigenvalue problem for obtaining the SVD, which */ +/* > allows for the computation of a subset of singular values and */ +/* > vectors. See SBDSVDX for details. */ +/* > */ +/* > Note that the routine returns V**T, not V. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'V': the first f2cmin(m,n) columns of U (the left singular */ +/* > vectors) or as specified by RANGE are returned in */ +/* > the array U; */ +/* > = 'N': no columns of U (no left singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVT */ +/* > \verbatim */ +/* > JOBVT is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix */ +/* > V**T: */ +/* > = 'V': the first f2cmin(m,n) rows of V**T (the right singular */ +/* > vectors) or as specified by RANGE are returned in */ +/* > the array VT; */ +/* > = 'N': no rows of V**T (no right singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RANGE */ +/* > \verbatim */ +/* > RANGE is CHARACTER*1 */ +/* > = 'A': all singular values will be found. */ +/* > = 'V': all singular values in the half-open interval (VL,VU] */ +/* > will be found. */ +/* > = 'I': the IL-th through IU-th singular values will be found. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the contents of A are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VL */ +/* > \verbatim */ +/* > VL is REAL */ +/* > If RANGE='V', the lower bound of the interval to */ +/* > be searched for singular values. VU > VL. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VU */ +/* > \verbatim */ +/* > VU is REAL */ +/* > If RANGE='V', the upper bound of the interval to */ +/* > be searched for singular values. VU > VL. */ +/* > Not referenced if RANGE = 'A' or 'I'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IL */ +/* > \verbatim */ +/* > IL is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > smallest singular value to be returned. */ +/* > 1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IU */ +/* > \verbatim */ +/* > IU is INTEGER */ +/* > If RANGE='I', the index of the */ +/* > largest singular value to be returned. */ +/* > 1 <= IL <= IU <= f2cmin(M,N), if f2cmin(M,N) > 0. */ +/* > Not referenced if RANGE = 'A' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] NS */ +/* > \verbatim */ +/* > NS is INTEGER */ +/* > The total number of singular values found, */ +/* > 0 <= NS <= f2cmin(M,N). */ +/* > If RANGE = 'A', NS = f2cmin(M,N); if RANGE = 'I', NS = IU-IL+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is REAL array, dimension (f2cmin(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is REAL array, dimension (LDU,UCOL) */ +/* > If JOBU = 'V', U contains columns of U (the left singular */ +/* > vectors, stored columnwise) as specified by RANGE; if */ +/* > JOBU = 'N', U is not referenced. */ +/* > Note: The user must ensure that UCOL >= NS; if RANGE = 'V', */ +/* > the exact value of NS is not known in advance and an upper */ +/* > bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; if */ +/* > JOBU = 'V', LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is REAL array, dimension (LDVT,N) */ +/* > If JOBVT = 'V', VT contains the rows of V**T (the right singular */ +/* > vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N', */ +/* > VT is not referenced. */ +/* > Note: The user must ensure that LDVT >= NS; if RANGE = 'V', */ +/* > the exact value of NS is not known in advance and an upper */ +/* > bound must be used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; if */ +/* > JOBVT = 'V', LDVT >= NS (see above). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see */ +/* > comments inside the code): */ +/* > - PATH 1 (M much larger than N) */ +/* > - PATH 1t (N much larger than M) */ +/* > LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths. */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (12*MIN(M,N)) */ +/* > If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0, */ +/* > then IWORK contains the indices of the eigenvectors that failed */ +/* > to converge in SBDSVDX/SSTEVX. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, then i eigenvectors failed to converge */ +/* > in SBDSVDX/SSTEVX. */ +/* > if INFO = N*2 + 1, an internal error occurred in */ +/* > SBDSVDX */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup realGEsing */ + +/* ===================================================================== */ +/* Subroutine */ int sgesvdx_(char *jobu, char *jobvt, char *range, integer * + m, integer *n, real *a, integer *lda, real *vl, real *vu, integer *il, + integer *iu, integer *ns, real *s, real *u, integer *ldu, real *vt, + integer *ldvt, real *work, integer *lwork, integer *iwork, integer * + info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], + i__2, i__3; + char ch__1[2]; + + /* Local variables */ + integer iscl; + logical alls, inds; + integer ilqf; + real anrm; + integer ierr, iqrf, itau; + char jobz[1]; + logical vals; + integer i__, j; + extern logical lsame_(char *, char *); + integer iltgk, itemp, minmn, itaup, itauq, iutgk, itgkz, mnthr; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantu; + integer id, ie; + extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, integer *, integer *); + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + real bignum; + extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slascl_(char *, integer + *, integer *, real *, real *, integer *, integer *, real *, + integer *, integer *); + real abstol; + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slacpy_(char *, integer + *, integer *, real *, integer *, real *, integer *); + char rngtgk[1]; + extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, + real *, real *, integer *), sormbr_(char *, char *, char * + , integer *, integer *, integer *, real *, integer *, real *, + real *, integer *, real *, integer *, integer *); + integer minwrk, maxwrk; + real smlnum; + extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + logical lquery, wantvt; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + real dum[1], eps; + extern /* Subroutine */ int sbdsvdx_(char *, char *, char *, integer *, + real *, real *, real *, real *, integer *, integer *, integer *, + real *, real *, integer *, real *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1 * 1; + vt -= vt_offset; + --work; + --iwork; + + /* Function Body */ + *ns = 0; + *info = 0; + abstol = slamch_("S") * 2; + lquery = *lwork == -1; + minmn = f2cmin(*m,*n); + wantu = lsame_(jobu, "V"); + wantvt = lsame_(jobvt, "V"); + if (wantu || wantvt) { + *(unsigned char *)jobz = 'V'; + } else { + *(unsigned char *)jobz = 'N'; + } + alls = lsame_(range, "A"); + vals = lsame_(range, "V"); + inds = lsame_(range, "I"); + + *info = 0; + if (! lsame_(jobu, "V") && ! lsame_(jobu, "N")) { + *info = -1; + } else if (! lsame_(jobvt, "V") && ! lsame_(jobvt, + "N")) { + *info = -2; + } else if (! (alls || vals || inds)) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*m > *lda) { + *info = -7; + } else if (minmn > 0) { + if (vals) { + if (*vl < 0.f) { + *info = -8; + } else if (*vu <= *vl) { + *info = -9; + } + } else if (inds) { + if (*il < 1 || *il > f2cmax(1,minmn)) { + *info = -10; + } else if (*iu < f2cmin(minmn,*il) || *iu > minmn) { + *info = -11; + } + } + if (*info == 0) { + if (wantu && *ldu < *m) { + *info = -15; + } else if (wantvt) { + if (inds) { + if (*ldvt < *iu - *il + 1) { + *info = -17; + } + } else if (*ldvt < minmn) { + *info = -17; + } + } + } + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (minmn > 0) { + if (*m >= *n) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + if (*m >= mnthr) { + +/* Path 1 (M much larger than N) */ + + maxwrk = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * (*n + 5) + (*n << 1) * ilaenv_( + &c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + if (wantu) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * (*n * 3 + 6) + *n * + ilaenv_(&c__1, "SORMQR", " ", n, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + if (wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * (*n * 3 + 6) + *n * + ilaenv_(&c__1, "SORMLQ", " ", n, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + minwrk = *n * (*n * 3 + 20); + } else { + +/* Path 2 (M at least N, but not much larger) */ + + maxwrk = (*n << 2) + (*m + *n) * ilaenv_(&c__1, "SGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wantu) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * ((*n << 1) + 5) + *n * + ilaenv_(&c__1, "SORMQR", " ", n, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + if (wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * ((*n << 1) + 5) + *n * + ilaenv_(&c__1, "SORMLQ", " ", n, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } +/* Computing MAX */ + i__2 = *n * ((*n << 1) + 19), i__3 = (*n << 2) + *m; + minwrk = f2cmax(i__2,i__3); + } + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + if (*n >= mnthr) { + +/* Path 1t (N much larger than M) */ + + maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * (*m + 5) + (*m << 1) * ilaenv_( + &c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + if (wantu) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * (*m * 3 + 6) + *m * + ilaenv_(&c__1, "SORMQR", " ", m, m, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + if (wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * (*m * 3 + 6) + *m * + ilaenv_(&c__1, "SORMLQ", " ", m, m, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + minwrk = *m * (*m * 3 + 20); + } else { + +/* Path 2t (N at least M, but not much larger) */ + + maxwrk = (*m << 2) + (*m + *n) * ilaenv_(&c__1, "SGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wantu) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * ((*m << 1) + 5) + *m * + ilaenv_(&c__1, "SORMQR", " ", m, m, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } + if (wantvt) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * ((*m << 1) + 5) + *m * + ilaenv_(&c__1, "SORMLQ", " ", m, m, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__2,i__3); + } +/* Computing MAX */ + i__2 = *m * ((*m << 1) + 19), i__3 = (*m << 2) + *n; + minwrk = f2cmax(i__2,i__3); + } + } + } + maxwrk = f2cmax(maxwrk,minwrk); + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -19; + } + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("SGESVDX", &i__2, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Set singular values indices accord to RANGE. */ + + if (alls) { + *(unsigned char *)rngtgk = 'I'; + iltgk = 1; + iutgk = f2cmin(*m,*n); + } else if (inds) { + *(unsigned char *)rngtgk = 'I'; + iltgk = *il; + iutgk = *iu; + } else { + *(unsigned char *)rngtgk = 'V'; + iltgk = 0; + iutgk = 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = sqrt(slamch_("S")) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0.f && anrm < smlnum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + } else if (anrm > bignum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce A using the QR */ +/* decomposition. */ + + if (*m >= mnthr) { + +/* Path 1 (M much larger than N): */ +/* A = Q * R = Q * ( QB * B * PB**T ) */ +/* = Q * ( QB * ( UB * S * VB**T ) * PB**T ) */ +/* U = Q * QB * UB; V**T = VB**T * PB**T */ + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + itau = 1; + itemp = itau + *n; + i__2 = *lwork - itemp + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2, + info); + +/* Copy R into WORK and bidiagonalize it: */ +/* (Workspace: need N*N+5*N, prefer N*N+4*N+2*N*NB) */ + + iqrf = itemp; + id = iqrf + *n * *n; + ie = id + *n; + itauq = ie + *n; + itaup = itauq + *n; + itemp = itaup + *n; + slacpy_("U", n, n, &a[a_offset], lda, &work[iqrf], n); + i__2 = *n - 1; + i__3 = *n - 1; + slaset_("L", &i__2, &i__3, &c_b109, &c_b109, &work[iqrf + 1], n); + i__2 = *lwork - itemp + 1; + sgebrd_(n, n, &work[iqrf], n, &work[id], &work[ie], &work[itauq], + &work[itaup], &work[itemp], &i__2, info); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 14*N + 2*N*(N+1)) */ + + itgkz = itemp; + itemp = itgkz + *n * ((*n << 1) + 1); + i__2 = *n << 1; + sbdsvdx_("U", jobz, rngtgk, n, &work[id], &work[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[ + itemp], &iwork[1], info); + +/* If needed, compute left singular vectors. */ + + if (wantu) { + j = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + scopy_(n, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1); + j += *n << 1; + } + i__2 = *m - *n; + slaset_("A", &i__2, ns, &c_b109, &c_b109, &u[*n + 1 + u_dim1], + ldu); + +/* Call SORMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + sormbr_("Q", "L", "N", n, ns, n, &work[iqrf], n, &work[itauq], + &u[u_offset], ldu, &work[itemp], &i__2, info); + +/* Call SORMQR to compute Q*(QB*UB). */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + sormqr_("L", "N", m, ns, n, &a[a_offset], lda, &work[itau], & + u[u_offset], ldu, &work[itemp], &i__2, info); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + j = itgkz + *n; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + scopy_(n, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt); + j += *n << 1; + } + +/* Call SORMBR to compute VB**T * PB**T */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + sormbr_("P", "R", "T", ns, n, n, &work[iqrf], n, &work[itaup], + &vt[vt_offset], ldvt, &work[itemp], &i__2, info); + } + } else { + +/* Path 2 (M at least N, but not much larger) */ +/* Reduce A to bidiagonal form without QR decomposition */ +/* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */ +/* U = QB * UB; V**T = VB**T * PB**T */ + +/* Bidiagonalize A */ +/* (Workspace: need 4*N+M, prefer 4*N+(M+N)*NB) */ + + id = 1; + ie = id + *n; + itauq = ie + *n; + itaup = itauq + *n; + itemp = itaup + *n; + i__2 = *lwork - itemp + 1; + sgebrd_(m, n, &a[a_offset], lda, &work[id], &work[ie], &work[ + itauq], &work[itaup], &work[itemp], &i__2, info); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 14*N + 2*N*(N+1)) */ + + itgkz = itemp; + itemp = itgkz + *n * ((*n << 1) + 1); + i__2 = *n << 1; + sbdsvdx_("U", jobz, rngtgk, n, &work[id], &work[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[ + itemp], &iwork[1], info); + +/* If needed, compute left singular vectors. */ + + if (wantu) { + j = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + scopy_(n, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1); + j += *n << 1; + } + i__2 = *m - *n; + slaset_("A", &i__2, ns, &c_b109, &c_b109, &u[*n + 1 + u_dim1], + ldu); + +/* Call SORMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + sormbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[itemp], &i__2, &ierr); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + j = itgkz + *n; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + scopy_(n, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt); + j += *n << 1; + } + +/* Call SORMBR to compute VB**T * PB**T */ +/* (Workspace in WORK( ITEMP ): need N, prefer N*NB) */ + + i__2 = *lwork - itemp + 1; + sormbr_("P", "R", "T", ns, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, & + ierr); + } + } + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce A using the LQ decomposition. */ + + if (*n >= mnthr) { + +/* Path 1t (N much larger than M): */ +/* A = L * Q = ( QB * B * PB**T ) * Q */ +/* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q */ +/* U = QB * UB ; V**T = VB**T * PB**T * Q */ + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + itau = 1; + itemp = itau + *m; + i__2 = *lwork - itemp + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[itemp], &i__2, + info); +/* Copy L into WORK and bidiagonalize it: */ +/* (Workspace in WORK( ITEMP ): need M*M+5*N, prefer M*M+4*M+2*M*NB) */ + + ilqf = itemp; + id = ilqf + *m * *m; + ie = id + *m; + itauq = ie + *m; + itaup = itauq + *m; + itemp = itaup + *m; + slacpy_("L", m, m, &a[a_offset], lda, &work[ilqf], m); + i__2 = *m - 1; + i__3 = *m - 1; + slaset_("U", &i__2, &i__3, &c_b109, &c_b109, &work[ilqf + *m], m); + i__2 = *lwork - itemp + 1; + sgebrd_(m, m, &work[ilqf], m, &work[id], &work[ie], &work[itauq], + &work[itaup], &work[itemp], &i__2, info); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 2*M*M+14*M) */ + + itgkz = itemp; + itemp = itgkz + *m * ((*m << 1) + 1); + i__2 = *m << 1; + sbdsvdx_("U", jobz, rngtgk, m, &work[id], &work[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[ + itemp], &iwork[1], info); + +/* If needed, compute left singular vectors. */ + + if (wantu) { + j = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + scopy_(m, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1); + j += *m << 1; + } + +/* Call SORMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + sormbr_("Q", "L", "N", m, ns, m, &work[ilqf], m, &work[itauq], + &u[u_offset], ldu, &work[itemp], &i__2, info); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + j = itgkz + *m; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + scopy_(m, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt); + j += *m << 1; + } + i__2 = *n - *m; + slaset_("A", ns, &i__2, &c_b109, &c_b109, &vt[(*m + 1) * + vt_dim1 + 1], ldvt); + +/* Call SORMBR to compute (VB**T)*(PB**T) */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + sormbr_("P", "R", "T", ns, m, m, &work[ilqf], m, &work[itaup], + &vt[vt_offset], ldvt, &work[itemp], &i__2, info); + +/* Call SORMLQ to compute ((VB**T)*(PB**T))*Q. */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + sormlq_("R", "N", ns, n, m, &a[a_offset], lda, &work[itau], & + vt[vt_offset], ldvt, &work[itemp], &i__2, info); + } + } else { + +/* Path 2t (N greater than M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ +/* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T */ +/* U = QB * UB; V**T = VB**T * PB**T */ + +/* Bidiagonalize A */ +/* (Workspace: need 4*M+N, prefer 4*M+(M+N)*NB) */ + + id = 1; + ie = id + *m; + itauq = ie + *m; + itaup = itauq + *m; + itemp = itaup + *m; + i__2 = *lwork - itemp + 1; + sgebrd_(m, n, &a[a_offset], lda, &work[id], &work[ie], &work[ + itauq], &work[itaup], &work[itemp], &i__2, info); + +/* Solve eigenvalue problem TGK*Z=Z*S. */ +/* (Workspace: need 2*M*M+14*M) */ + + itgkz = itemp; + itemp = itgkz + *m * ((*m << 1) + 1); + i__2 = *m << 1; + sbdsvdx_("L", jobz, rngtgk, m, &work[id], &work[ie], vl, vu, & + iltgk, &iutgk, ns, &s[1], &work[itgkz], &i__2, &work[ + itemp], &iwork[1], info); + +/* If needed, compute left singular vectors. */ + + if (wantu) { + j = itgkz; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + scopy_(m, &work[j], &c__1, &u[i__ * u_dim1 + 1], &c__1); + j += *m << 1; + } + +/* Call SORMBR to compute QB*UB. */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + sormbr_("Q", "L", "N", m, ns, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[itemp], &i__2, info); + } + +/* If needed, compute right singular vectors. */ + + if (wantvt) { + j = itgkz + *m; + i__2 = *ns; + for (i__ = 1; i__ <= i__2; ++i__) { + scopy_(m, &work[j], &c__1, &vt[i__ + vt_dim1], ldvt); + j += *m << 1; + } + i__2 = *n - *m; + slaset_("A", ns, &i__2, &c_b109, &c_b109, &vt[(*m + 1) * + vt_dim1 + 1], ldvt); + +/* Call SORMBR to compute VB**T * PB**T */ +/* (Workspace in WORK( ITEMP ): need M, prefer M*NB) */ + + i__2 = *lwork - itemp + 1; + sormbr_("P", "R", "T", ns, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[itemp], &i__2, + info); + } + } + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (anrm < smlnum) { + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (real) maxwrk; + + return 0; + +/* End of SGESVDX */ + +} /* sgesvdx_ */ + diff --git a/lapack-netlib/SRC/sgesvj.c b/lapack-netlib/SRC/sgesvj.c new file mode 100644 index 000000000..32f49143c --- /dev/null +++ b/lapack-netlib/SRC/sgesvj.c @@ -0,0 +1,2210 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGESVJ */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGESVJ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, */ +/* LDV, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDV, LWORK, M, MV, N */ +/* CHARACTER*1 JOBA, JOBU, JOBV */ +/* REAL A( LDA, * ), SVA( N ), V( LDV, * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGESVJ computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, where M >= N. The SVD of A is written as */ +/* > [++] [xx] [x0] [xx] */ +/* > A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] */ +/* > [++] [xx] */ +/* > where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */ +/* > matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */ +/* > of SIGMA are the singular values of A. The columns of U and V are the */ +/* > left and the right singular vectors of A, respectively. */ +/* > SGESVJ can sometimes compute tiny singular values and their singular vectors much */ +/* > more accurately than other SVD routines, see below under Further Details. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBA */ +/* > \verbatim */ +/* > JOBA is CHARACTER*1 */ +/* > Specifies the structure of A. */ +/* > = 'L': The input matrix A is lower triangular; */ +/* > = 'U': The input matrix A is upper triangular; */ +/* > = 'G': The input matrix A is general M-by-N matrix, M >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies whether to compute the left singular vectors */ +/* > (columns of U): */ +/* > = 'U': The left singular vectors corresponding to the nonzero */ +/* > singular values are computed and returned in the leading */ +/* > columns of A. See more details in the description of A. */ +/* > The default numerical orthogonality threshold is set to */ +/* > approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=SLAMCH('E'). */ +/* > = 'C': Analogous to JOBU='U', except that user can control the */ +/* > level of numerical orthogonality of the computed left */ +/* > singular vectors. TOL can be set to TOL = CTOL*EPS, where */ +/* > CTOL is given on input in the array WORK. */ +/* > No CTOL smaller than ONE is allowed. CTOL greater */ +/* > than 1 / EPS is meaningless. The option 'C' */ +/* > can be used if M*EPS is satisfactory orthogonality */ +/* > of the computed left singular vectors, so CTOL=M could */ +/* > save few sweeps of Jacobi rotations. */ +/* > See the descriptions of A and WORK(1). */ +/* > = 'N': The matrix U is not computed. However, see the */ +/* > description of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether to compute the right singular vectors, that */ +/* > is, the matrix V: */ +/* > = 'V': the matrix V is computed and returned in the array V */ +/* > = 'A': the Jacobi rotations are applied to the MV-by-N */ +/* > array V. In other words, the right singular vector */ +/* > matrix V is not computed explicitly; instead it is */ +/* > applied to an MV-by-N matrix initially stored in the */ +/* > first MV rows of V. */ +/* > = 'N': the matrix V is not computed and the array V is not */ +/* > referenced */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. 1/SLAMCH('E') > M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. */ +/* > M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > If JOBU = 'U' .OR. JOBU = 'C': */ +/* > If INFO = 0: */ +/* > RANKA orthonormal columns of U are returned in the */ +/* > leading RANKA columns of the array A. Here RANKA <= N */ +/* > is the number of computed singular values of A that are */ +/* > above the underflow threshold SLAMCH('S'). The singular */ +/* > vectors corresponding to underflowed or zero singular */ +/* > values are not computed. The value of RANKA is returned */ +/* > in the array WORK as RANKA=NINT(WORK(2)). Also see the */ +/* > descriptions of SVA and WORK. The computed columns of U */ +/* > are mutually numerically orthogonal up to approximately */ +/* > TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU = 'C'), */ +/* > see the description of JOBU. */ +/* > If INFO > 0, */ +/* > the procedure SGESVJ did not converge in the given number */ +/* > of iterations (sweeps). In that case, the computed */ +/* > columns of U may not be orthogonal up to TOL. The output */ +/* > U (stored in A), SIGMA (given by the computed singular */ +/* > values in SVA(1:N)) and V is still a decomposition of the */ +/* > input matrix A in the sense that the residual */ +/* > ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small. */ +/* > If JOBU = 'N': */ +/* > If INFO = 0: */ +/* > Note that the left singular vectors are 'for free' in the */ +/* > one-sided Jacobi SVD algorithm. However, if only the */ +/* > singular values are needed, the level of numerical */ +/* > orthogonality of U is not an issue and iterations are */ +/* > stopped when the columns of the iterated matrix are */ +/* > numerically orthogonal up to approximately M*EPS. Thus, */ +/* > on exit, A contains the columns of U scaled with the */ +/* > corresponding singular values. */ +/* > If INFO > 0: */ +/* > the procedure SGESVJ did not converge in the given number */ +/* > of iterations (sweeps). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SVA */ +/* > \verbatim */ +/* > SVA is REAL array, dimension (N) */ +/* > On exit, */ +/* > If INFO = 0 : */ +/* > depending on the value SCALE = WORK(1), we have: */ +/* > If SCALE = ONE: */ +/* > SVA(1:N) contains the computed singular values of A. */ +/* > During the computation SVA contains the Euclidean column */ +/* > norms of the iterated matrices in the array A. */ +/* > If SCALE .NE. ONE: */ +/* > The singular values of A are SCALE*SVA(1:N), and this */ +/* > factored representation is due to the fact that some of the */ +/* > singular values of A might underflow or overflow. */ +/* > */ +/* > If INFO > 0 : */ +/* > the procedure SGESVJ did not converge in the given number of */ +/* > iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MV */ +/* > \verbatim */ +/* > MV is INTEGER */ +/* > If JOBV = 'A', then the product of Jacobi rotations in SGESVJ */ +/* > is applied to the first MV rows of V. See the description of JOBV. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is REAL array, dimension (LDV,N) */ +/* > If JOBV = 'V', then V contains on exit the N-by-N matrix of */ +/* > the right singular vectors; */ +/* > If JOBV = 'A', then V contains the product of the computed right */ +/* > singular vector matrix and the initial matrix in */ +/* > the array V. */ +/* > If JOBV = 'N', then V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V', then LDV >= f2cmax(1,N). */ +/* > If JOBV = 'A', then LDV >= f2cmax(1,MV) . */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (LWORK) */ +/* > On entry, */ +/* > If JOBU = 'C' : */ +/* > WORK(1) = CTOL, where CTOL defines the threshold for convergence. */ +/* > The process stops if all columns of A are mutually */ +/* > orthogonal up to CTOL*EPS, EPS=SLAMCH('E'). */ +/* > It is required that CTOL >= ONE, i.e. it is not */ +/* > allowed to force the routine to obtain orthogonality */ +/* > below EPSILON. */ +/* > On exit, */ +/* > WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) */ +/* > are the computed singular vcalues of A. */ +/* > (See description of SVA().) */ +/* > WORK(2) = NINT(WORK(2)) is the number of the computed nonzero */ +/* > singular values. */ +/* > WORK(3) = NINT(WORK(3)) is the number of the computed singular */ +/* > values that are larger than the underflow threshold. */ +/* > WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi */ +/* > rotations needed for numerical convergence. */ +/* > WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. */ +/* > This is useful information in cases when SGESVJ did */ +/* > not converge, as it can be used to estimate whether */ +/* > the output is still useful and for post festum analysis. */ +/* > WORK(6) = the largest absolute value over all sines of the */ +/* > Jacobi rotation angles in the last sweep. It can be */ +/* > useful for a post festum analysis. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > length of WORK, WORK >= MAX(6,M+N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value */ +/* > > 0: SGESVJ did not converge in the maximal allowed number (30) */ +/* > of sweeps. The output may still be useful. See the */ +/* > description of WORK. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane */ +/* > rotations. The rotations are implemented as fast scaled rotations of */ +/* > Anda and Park [1]. In the case of underflow of the Jacobi angle, a */ +/* > modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses */ +/* > column interchanges of de Rijk [2]. The relative accuracy of the computed */ +/* > singular values and the accuracy of the computed singular vectors (in */ +/* > angle metric) is as guaranteed by the theory of Demmel and Veselic [3]. */ +/* > The condition number that determines the accuracy in the full rank case */ +/* > is essentially min_{D=diag} kappa(A*D), where kappa(.) is the */ +/* > spectral condition number. The best performance of this Jacobi SVD */ +/* > procedure is achieved if used in an accelerated version of Drmac and */ +/* > Veselic [5,6], and it is the kernel routine in the SIGMA library [7]. */ +/* > Some tunning parameters (marked with [TP]) are available for the */ +/* > implementer. \n */ +/* > The computational range for the nonzero singular values is the machine */ +/* > number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even */ +/* > denormalized singular values can be computed with the corresponding */ +/* > gradual loss of accurate digits. */ +/* > */ +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ +/* > */ +/* > \par References: */ +/* ================ */ +/* > */ +/* > [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. \n */ +/* > SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. \n\n */ +/* > [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the */ +/* > singular value decomposition on a vector computer. \n */ +/* > SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. \n\n */ +/* > [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. \n */ +/* > [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular */ +/* > value computation in floating point arithmetic. \n */ +/* > SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. \n\n */ +/* > [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. \n */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. \n */ +/* > LAPACK Working note 169. \n\n */ +/* > [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. \n */ +/* > SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. \n */ +/* > LAPACK Working note 170. \n\n */ +/* > [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */ +/* > QSVD, (H,K)-SVD computations.\n */ +/* > Department of Mathematics, University of Zagreb, 2008. */ +/* > */ +/* > \par Bugs, Examples and Comments: */ +/* ================================= */ +/* > */ +/* > Please report all bugs and send interesting test examples and comments to */ +/* > drmac@math.hr. Thank you. */ + +/* ===================================================================== */ +/* Subroutine */ int sgesvj_(char *joba, char *jobu, char *jobv, integer *m, + integer *n, real *a, integer *lda, real *sva, integer *mv, real *v, + integer *ldv, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2; + + /* Local variables */ + real aapp, aapq, aaqq, ctol; + integer ierr; + real bigtheta; + extern real sdot_(integer *, real *, integer *, real *, integer *); + integer pskipped; + real aapp0, temp1; + extern real snrm2_(integer *, real *, integer *); + integer i__, p, q; + real t, large, apoaq, aqoap; + extern logical lsame_(char *, char *); + real theta; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + real small, sfmin; + logical lsvec; + real fastr[5], epsln; + logical applv, rsvec, uctol, lower, upper; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical rotok; + integer n2; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *); + integer n4; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *), srotm_(integer *, real *, integer *, real *, + integer *, real *); + real rootsfmin; + extern /* Subroutine */ int sgsvj0_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, real *, integer *, real *, integer *, integer *), + sgsvj1_(char *, integer *, integer *, integer *, real *, integer * + , real *, real *, integer *, real *, integer *, real *, real *, + real *, integer *, real *, integer *, integer *); + integer n34; + real cs, sn; + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer ijblsk, swband; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + integer blskip; + real mxaapq; + extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, + real *, real *, integer *); + real thsign; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + real mxsinj; + integer ir1, emptsw, notrot, iswrot, jbc; + real big; + integer kbl, lkahead, igl, ibr, jgl, nbl; + real skl; + logical goscale; + real tol; + integer mvl; + logical noscale; + real rootbig, rooteps; + integer rowskip; + real roottol; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + +/* from BLAS */ +/* from LAPACK */ +/* from BLAS */ +/* from LAPACK */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + --sva; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + + /* Function Body */ + lsvec = lsame_(jobu, "U"); + uctol = lsame_(jobu, "C"); + rsvec = lsame_(jobv, "V"); + applv = lsame_(jobv, "A"); + upper = lsame_(joba, "U"); + lower = lsame_(joba, "L"); + + if (! (upper || lower || lsame_(joba, "G"))) { + *info = -1; + } else if (! (lsvec || uctol || lsame_(jobu, "N"))) + { + *info = -2; + } else if (! (rsvec || applv || lsame_(jobv, "N"))) + { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0 || *n > *m) { + *info = -5; + } else if (*lda < *m) { + *info = -7; + } else if (*mv < 0) { + *info = -9; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -11; + } else if (uctol && work[1] <= 1.f) { + *info = -12; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m + *n; + if (*lwork < f2cmax(i__1,6)) { + *info = -13; + } else { + *info = 0; + } + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESVJ", &i__1, (ftnlen)6); + return 0; + } + +/* #:) Quick return for void matrix */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Set numerical parameters */ +/* The stopping criterion for Jacobi rotations is */ + +/* max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS */ + +/* where EPS is the round-off and CTOL is defined as follows: */ + + if (uctol) { +/* ... user controlled */ + ctol = work[1]; + } else { +/* ... default */ + if (lsvec || rsvec || applv) { + ctol = sqrt((real) (*m)); + } else { + ctol = (real) (*m); + } + } +/* ... and the machine dependent parameters are */ +/* [!] (Make sure that SLAMCH() works properly on the target machine.) */ + + epsln = slamch_("Epsilon"); + rooteps = sqrt(epsln); + sfmin = slamch_("SafeMinimum"); + rootsfmin = sqrt(sfmin); + small = sfmin / epsln; + big = slamch_("Overflow"); +/* BIG = ONE / SFMIN */ + rootbig = 1.f / rootsfmin; + large = big / sqrt((real) (*m * *n)); + bigtheta = 1.f / rooteps; + + tol = ctol * epsln; + roottol = sqrt(tol); + + if ((real) (*m) * epsln >= 1.f) { + *info = -4; + i__1 = -(*info); + xerbla_("SGESVJ", &i__1, (ftnlen)6); + return 0; + } + +/* Initialize the right singular vector matrix. */ + + if (rsvec) { + mvl = *n; + slaset_("A", &mvl, n, &c_b17, &c_b18, &v[v_offset], ldv); + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + +/* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) */ +/* (!) If necessary, scale A to protect the largest singular value */ +/* from overflow. It is possible that saving the largest singular */ +/* value destroys the information about the small ones. */ +/* This initial scaling is almost minimal in the sense that the */ +/* goal is to make sure that no column norm overflows, and that */ +/* SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries */ +/* in A are detected, the procedure returns with INFO=-6. */ + + skl = 1.f / sqrt((real) (*m) * (real) (*n)); + noscale = TRUE_; + goscale = TRUE_; + + if (lower) { +/* the input matrix is M-by-N lower triangular (trapezoidal) */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.f; + aaqq = 1.f; + i__2 = *m - p + 1; + slassq_(&i__2, &a[p + p * a_dim1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + xerbla_("SGESVJ", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * skl); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= skl; +/* L1873: */ + } + } + } +/* L1874: */ + } + } else if (upper) { +/* the input matrix is M-by-N upper triangular (trapezoidal) */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.f; + aaqq = 1.f; + slassq_(&p, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + xerbla_("SGESVJ", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * skl); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= skl; +/* L2873: */ + } + } + } +/* L2874: */ + } + } else { +/* the input matrix is M-by-N general dense */ + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + aapp = 0.f; + aaqq = 1.f; + slassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq); + if (aapp > big) { + *info = -6; + i__2 = -(*info); + xerbla_("SGESVJ", &i__2, (ftnlen)6); + return 0; + } + aaqq = sqrt(aaqq); + if (aapp < big / aaqq && noscale) { + sva[p] = aapp * aaqq; + } else { + noscale = FALSE_; + sva[p] = aapp * (aaqq * skl); + if (goscale) { + goscale = FALSE_; + i__2 = p - 1; + for (q = 1; q <= i__2; ++q) { + sva[q] *= skl; +/* L3873: */ + } + } + } +/* L3874: */ + } + } + + if (noscale) { + skl = 1.f; + } + +/* Move the smaller part of the spectrum from the underflow threshold */ +/* (!) Start by determining the position of the nonzero entries of the */ +/* array SVA() relative to ( SFMIN, BIG ). */ + + aapp = 0.f; + aaqq = big; + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + if (sva[p] != 0.f) { +/* Computing MIN */ + r__1 = aaqq, r__2 = sva[p]; + aaqq = f2cmin(r__1,r__2); + } +/* Computing MAX */ + r__1 = aapp, r__2 = sva[p]; + aapp = f2cmax(r__1,r__2); +/* L4781: */ + } + +/* #:) Quick return for zero matrix */ + + if (aapp == 0.f) { + if (lsvec) { + slaset_("G", m, n, &c_b17, &c_b18, &a[a_offset], lda); + } + work[1] = 1.f; + work[2] = 0.f; + work[3] = 0.f; + work[4] = 0.f; + work[5] = 0.f; + work[6] = 0.f; + return 0; + } + +/* #:) Quick return for one-column matrix */ + + if (*n == 1) { + if (lsvec) { + slascl_("G", &c__0, &c__0, &sva[1], &skl, m, &c__1, &a[a_dim1 + 1] + , lda, &ierr); + } + work[1] = 1.f / skl; + if (sva[1] >= sfmin) { + work[2] = 1.f; + } else { + work[2] = 0.f; + } + work[3] = 0.f; + work[4] = 0.f; + work[5] = 0.f; + work[6] = 0.f; + return 0; + } + +/* Protect small singular values from underflow, and try to */ +/* avoid underflows/overflows in computing Jacobi rotations. */ + + sn = sqrt(sfmin / epsln); + temp1 = sqrt(big / (real) (*n)); + if (aapp <= sn || aaqq >= temp1 || sn <= aaqq && aapp <= temp1) { +/* Computing MIN */ + r__1 = big, r__2 = temp1 / aapp; + temp1 = f2cmin(r__1,r__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq <= sn && aapp <= temp1) { +/* Computing MIN */ + r__1 = sn / aaqq, r__2 = big / (aapp * sqrt((real) (*n))); + temp1 = f2cmin(r__1,r__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq >= sn && aapp >= temp1) { +/* Computing MAX */ + r__1 = sn / aaqq, r__2 = temp1 / aapp; + temp1 = f2cmax(r__1,r__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else if (aaqq <= sn && aapp >= temp1) { +/* Computing MIN */ + r__1 = sn / aaqq, r__2 = big / (sqrt((real) (*n)) * aapp); + temp1 = f2cmin(r__1,r__2); +/* AAQQ = AAQQ*TEMP1 */ +/* AAPP = AAPP*TEMP1 */ + } else { + temp1 = 1.f; + } + +/* Scale, if necessary */ + + if (temp1 != 1.f) { + slascl_("G", &c__0, &c__0, &c_b18, &temp1, n, &c__1, &sva[1], n, & + ierr); + } + skl = temp1 * skl; + if (skl != 1.f) { + slascl_(joba, &c__0, &c__0, &c_b18, &skl, m, n, &a[a_offset], lda, & + ierr); + skl = 1.f / skl; + } + +/* Row-cyclic Jacobi SVD algorithm with column pivoting */ + + emptsw = *n * (*n - 1) / 2; + notrot = 0; + fastr[0] = 0.f; + +/* A is represented in factored form A = A * diag(WORK), where diag(WORK) */ +/* is initialized to identity. WORK is updated during fast scaled */ +/* rotations. */ + + i__1 = *n; + for (q = 1; q <= i__1; ++q) { + work[q] = 1.f; +/* L1868: */ + } + + + swband = 3; +/* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */ +/* if SGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure */ +/* works on pivots inside a band-like region around the diagonal. */ +/* The boundaries are determined dynamically, based on the number of */ +/* pivots above a threshold. */ + + kbl = f2cmin(8,*n); +/* [TP] KBL is a tuning parameter that defines the tile size in the */ +/* tiling of the p-q loops of pivot pairs. In general, an optimal */ +/* value of KBL depends on the matrix dimensions and on the */ +/* parameters of the computer's memory. */ + + nbl = *n / kbl; + if (nbl * kbl != *n) { + ++nbl; + } + +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + + rowskip = f2cmin(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + + lkahead = 1; +/* [TP] LKAHEAD is a tuning parameter. */ + +/* Quasi block transformations, using the lower (upper) triangular */ +/* structure of the input matrix. The quasi-block-cycling usually */ +/* invokes cubic convergence. Big part of this cycle is done inside */ +/* canonical subspaces of dimensions less than M. */ + +/* Computing MAX */ + i__1 = 64, i__2 = kbl << 2; + if ((lower || upper) && *n > f2cmax(i__1,i__2)) { +/* [TP] The number of partition levels and the actual partition are */ +/* tuning parameters. */ + n4 = *n / 4; + n2 = *n / 2; + n34 = n4 * 3; + if (applv) { + q = 0; + } else { + q = 1; + } + + if (lower) { + +/* This works very well on lower triangular matrices, in particular */ +/* in the framework of the preconditioned Jacobi SVD (xGEJSV). */ +/* The idea is simple: */ +/* [+ 0 0 0] Note that Jacobi transformations of [0 0] */ +/* [+ + 0 0] [0 0] */ +/* [+ + x 0] actually work on [x 0] [x 0] */ +/* [+ + x x] [x x]. [x x] */ + + i__1 = *m - n34; + i__2 = *n - n34; + i__3 = *lwork - *n; + sgsvj0_(jobv, &i__1, &i__2, &a[n34 + 1 + (n34 + 1) * a_dim1], lda, + &work[n34 + 1], &sva[n34 + 1], &mvl, &v[n34 * q + 1 + ( + n34 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, & + work[*n + 1], &i__3, &ierr); + + i__1 = *m - n2; + i__2 = n34 - n2; + i__3 = *lwork - *n; + sgsvj0_(jobv, &i__1, &i__2, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, & + work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) + * v_dim1], ldv, &epsln, &sfmin, &tol, &c__2, &work[*n + + 1], &i__3, &ierr); + + i__1 = *m - n2; + i__2 = *n - n2; + i__3 = *lwork - *n; + sgsvj1_(jobv, &i__1, &i__2, &n4, &a[n2 + 1 + (n2 + 1) * a_dim1], + lda, &work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + ( + n2 + 1) * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, & + work[*n + 1], &i__3, &ierr); + + i__1 = *m - n4; + i__2 = n2 - n4; + i__3 = *lwork - *n; + sgsvj0_(jobv, &i__1, &i__2, &a[n4 + 1 + (n4 + 1) * a_dim1], lda, & + work[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) + * v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + + 1], &i__3, &ierr); + + i__1 = *lwork - *n; + sgsvj0_(jobv, m, &n4, &a[a_offset], lda, &work[1], &sva[1], &mvl, + &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + + 1], &i__1, &ierr); + + i__1 = *lwork - *n; + sgsvj1_(jobv, m, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], & + mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, & + work[*n + 1], &i__1, &ierr); + + + } else if (upper) { + + + i__1 = *lwork - *n; + sgsvj0_(jobv, &n4, &n4, &a[a_offset], lda, &work[1], &sva[1], & + mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__2, & + work[*n + 1], &i__1, &ierr); + + i__1 = *lwork - *n; + sgsvj0_(jobv, &n2, &n4, &a[(n4 + 1) * a_dim1 + 1], lda, &work[n4 + + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) * + v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 1], + &i__1, &ierr); + + i__1 = *lwork - *n; + sgsvj1_(jobv, &n2, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], + &mvl, &v[v_offset], ldv, &epsln, &sfmin, &tol, &c__1, & + work[*n + 1], &i__1, &ierr); + + i__1 = n2 + n4; + i__2 = *lwork - *n; + sgsvj0_(jobv, &i__1, &n4, &a[(n2 + 1) * a_dim1 + 1], lda, &work[ + n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) * + v_dim1], ldv, &epsln, &sfmin, &tol, &c__1, &work[*n + 1], + &i__2, &ierr); + } + + } + + + for (i__ = 1; i__ <= 30; ++i__) { + + + mxaapq = 0.f; + mxsinj = 0.f; + iswrot = 0; + + notrot = 0; + pskipped = 0; + +/* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */ +/* 1 <= p < q <= N. This is the first step toward a blocked implementation */ +/* of the rotations. New implementation, based on block transformations, */ +/* is under development. */ + + i__1 = nbl; + for (ibr = 1; ibr <= i__1; ++ibr) { + + igl = (ibr - 1) * kbl + 1; + +/* Computing MIN */ + i__3 = lkahead, i__4 = nbl - ibr; + i__2 = f2cmin(i__3,i__4); + for (ir1 = 0; ir1 <= i__2; ++ir1) { + + igl += ir1 * kbl; + +/* Computing MIN */ + i__4 = igl + kbl - 1, i__5 = *n - 1; + i__3 = f2cmin(i__4,i__5); + for (p = igl; p <= i__3; ++p) { + + + i__4 = *n - p + 1; + q = isamax_(&i__4, &sva[p], &c__1) + p - 1; + if (p != q) { + sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + + 1], &c__1); + if (rsvec) { + sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1); + } + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = work[p]; + work[p] = work[q]; + work[q] = temp1; + } + + if (ir1 == 0) { + +/* Column norms are periodically updated by explicit */ +/* norm computation. */ +/* Caveat: */ +/* Unfortunately, some BLAS implementations compute SNRM2(M,A(1,p),1) */ +/* as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to */ +/* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to */ +/* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). */ +/* Hence, SNRM2 cannot be trusted, not even in the case when */ +/* the true norm is far from the under(over)flow boundaries. */ +/* If properly implemented SNRM2 is available, the IF-THEN-ELSE */ +/* below should read "AAPP = SNRM2( M, A(1,p), 1 ) * WORK(p)". */ + + if (sva[p] < rootbig && sva[p] > rootsfmin) { + sva[p] = snrm2_(m, &a[p * a_dim1 + 1], &c__1) * + work[p]; + } else { + temp1 = 0.f; + aapp = 1.f; + slassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & + aapp); + sva[p] = temp1 * sqrt(aapp) * work[p]; + } + aapp = sva[p]; + } else { + aapp = sva[p]; + } + + if (aapp > 0.f) { + + pskipped = 0; + +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = f2cmin(i__5,*n); + for (q = p + 1; q <= i__4; ++q) { + + aaqq = sva[q]; + + if (aaqq > 0.f) { + + aapp0 = aapp; + if (aaqq >= 1.f) { + rotok = small * aapp <= aaqq; + if (aapp < big / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + scopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + slascl_("G", &c__0, &c__0, &aapp, & + work[p], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = sdot_(m, &work[*n + 1], &c__1, + &a[q * a_dim1 + 1], &c__1) * + work[q] / aaqq; + } + } else { + rotok = aapp <= aaqq / small; + if (aapp > small / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + scopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + slascl_("G", &c__0, &c__0, &aaqq, & + work[q], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = sdot_(m, &work[*n + 1], &c__1, + &a[p * a_dim1 + 1], &c__1) * + work[p] / aapp; + } + } + +/* Computing MAX */ + r__1 = mxaapq, r__2 = abs(aapq); + mxaapq = f2cmax(r__1,r__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > tol) { + +/* [RTD] ROTATED = ROTATED + ONE */ + + if (ir1 == 0) { + notrot = 0; + pskipped = 0; + ++iswrot; + } + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (r__1 = aqoap - apoaq, abs( + r__1)) * -.5f / aapq; + + if (abs(theta) > bigtheta) { + + t = .5f / theta; + fastr[2] = t * work[p] / work[q]; + fastr[3] = -t * work[q] / work[p]; + srotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(t); + mxsinj = f2cmax(r__1,r__2); + + } else { + + + thsign = -r_sign(&c_b18, &aapq); + t = 1.f / (theta + thsign * sqrt( + theta * theta + 1.f)); + cs = sqrt(1.f / (t * t + 1.f)); + sn = t * cs; + +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(sn); + mxsinj = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); + + apoaq = work[p] / work[q]; + aqoap = work[q] / work[p]; + if (work[p] >= 1.f) { + if (work[q] >= 1.f) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + work[p] *= cs; + work[q] *= cs; + srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + work[p] *= cs; + work[q] /= cs; + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + } + } else { + if (work[q] >= 1.f) { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + work[p] /= cs; + work[q] *= cs; + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + } else { + if (work[p] >= work[q]) { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + work[p] *= cs; + work[q] /= cs; + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + work[p] /= cs; + work[q] *= cs; + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { + scopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + slascl_("G", &c__0, &c__0, &aapp, & + c_b18, m, &c__1, &work[*n + 1] + , lda, &ierr); + slascl_("G", &c__0, &c__0, &aaqq, & + c_b18, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * work[p] / work[q]; + saxpy_(m, &temp1, &work[*n + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1); + slascl_("G", &c__0, &c__0, &c_b18, & + aaqq, m, &c__1, &a[q * a_dim1 + + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq * aapq; + sva[q] = aaqq * sqrt((f2cmax(r__1,r__2))) + ; + mxsinj = f2cmax(mxsinj,sfmin); + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* recompute SVA(q), SVA(p). */ + +/* Computing 2nd power */ + r__1 = sva[q] / aaqq; + if (r__1 * r__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = snrm2_(m, &a[q * a_dim1 + + 1], &c__1) * work[q]; + } else { + t = 0.f; + aaqq = 1.f; + slassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * work[q]; + } + } + if (aapp / aapp0 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = snrm2_(m, &a[p * a_dim1 + + 1], &c__1) * work[p]; + } else { + t = 0.f; + aapp = 1.f; + slassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * work[p]; + } + sva[p] = aapp; + } + + } else { +/* A(:,p) and A(:,q) already numerically orthogonal */ + if (ir1 == 0) { + ++notrot; + } +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + } + } else { +/* A(:,q) is zero column */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + + if (i__ <= swband && pskipped > rowskip) { + if (ir1 == 0) { + aapp = -aapp; + } + notrot = 0; + goto L2103; + } + +/* L2002: */ + } +/* END q-LOOP */ + +L2103: +/* bailed out of q-loop */ + + sva[p] = aapp; + + } else { + sva[p] = aapp; + if (ir1 == 0 && aapp == 0.f) { +/* Computing MIN */ + i__4 = igl + kbl - 1; + notrot = notrot + f2cmin(i__4,*n) - p; + } + } + +/* L2001: */ + } +/* end of the p-loop */ +/* end of doing the block ( ibr, ibr ) */ +/* L1002: */ + } +/* end of ir1-loop */ + +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + + i__2 = nbl; + for (jbc = ibr + 1; jbc <= i__2; ++jbc) { + + jgl = (jbc - 1) * kbl + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = f2cmin(i__4,*n); + for (p = igl; p <= i__3; ++p) { + + aapp = sva[p]; + if (aapp > 0.f) { + + pskipped = 0; + +/* Computing MIN */ + i__5 = jgl + kbl - 1; + i__4 = f2cmin(i__5,*n); + for (q = jgl; q <= i__4; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.f) { + aapp0 = aapp; + + +/* Safe Gram matrix computation */ + + if (aaqq >= 1.f) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + scopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + slascl_("G", &c__0, &c__0, &aapp, & + work[p], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = sdot_(m, &work[*n + 1], &c__1, + &a[q * a_dim1 + 1], &c__1) * + work[q] / aaqq; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * work[p] * work[q] / + aaqq / aapp; + } else { + scopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[*n + 1], &c__1); + slascl_("G", &c__0, &c__0, &aaqq, & + work[q], m, &c__1, &work[*n + + 1], lda, &ierr); + aapq = sdot_(m, &work[*n + 1], &c__1, + &a[p * a_dim1 + 1], &c__1) * + work[p] / aapp; + } + } + +/* Computing MAX */ + r__1 = mxaapq, r__2 = abs(aapq); + mxaapq = f2cmax(r__1,r__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > tol) { + notrot = 0; +/* [RTD] ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (r__1 = aqoap - apoaq, abs( + r__1)) * -.5f / aapq; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5f / theta; + fastr[2] = t * work[p] / work[q]; + fastr[3] = -t * work[q] / work[p]; + srotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(t); + mxsinj = f2cmax(r__1,r__2); + } else { + + + thsign = -r_sign(&c_b18, &aapq); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1.f / (theta + thsign * sqrt( + theta * theta + 1.f)); + cs = sqrt(1.f / (t * t + 1.f)); + sn = t * cs; +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(sn); + mxsinj = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); + + apoaq = work[p] / work[q]; + aqoap = work[q] / work[p]; + if (work[p] >= 1.f) { + + if (work[q] >= 1.f) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + work[p] *= cs; + work[q] *= cs; + srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + work[p] *= cs; + work[q] /= cs; + } + } else { + if (work[q] >= 1.f) { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + work[p] /= cs; + work[q] *= cs; + } else { + if (work[p] >= work[q]) { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + work[p] *= cs; + work[q] /= cs; + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + work[p] /= cs; + work[q] *= cs; + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { + if (aapp > aaqq) { + scopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[*n + 1], & + c__1); + slascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &work[* + n + 1], lda, &ierr); + slascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * work[p] / work[q]; + saxpy_(m, &temp1, &work[*n + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1); + slascl_("G", &c__0, &c__0, &c_b18, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq * + aapq; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,sfmin); + } else { + scopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[*n + 1], & + c__1); + slascl_("G", &c__0, &c__0, &aaqq, + &c_b18, m, &c__1, &work[* + n + 1], lda, &ierr); + slascl_("G", &c__0, &c__0, &aapp, + &c_b18, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * work[q] / work[p]; + saxpy_(m, &temp1, &work[*n + 1], & + c__1, &a[p * a_dim1 + 1], + &c__1); + slascl_("G", &c__0, &c__0, &c_b18, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq * + aapq; + sva[p] = aapp * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q) */ +/* Computing 2nd power */ + r__1 = sva[q] / aaqq; + if (r__1 * r__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = snrm2_(m, &a[q * a_dim1 + + 1], &c__1) * work[q]; + } else { + t = 0.f; + aaqq = 1.f; + slassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * work[q]; + } + } +/* Computing 2nd power */ + r__1 = aapp / aapp0; + if (r__1 * r__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = snrm2_(m, &a[p * a_dim1 + + 1], &c__1) * work[p]; + } else { + t = 0.f; + aapp = 1.f; + slassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * work[p]; + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* [RTD] SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + + if (aapp == 0.f) { +/* Computing MIN */ + i__4 = jgl + kbl - 1; + notrot = notrot + f2cmin(i__4,*n) - jgl + 1; + } + if (aapp < 0.f) { + notrot = 0; + } + + } + +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__3 = igl + kbl - 1; + i__2 = f2cmin(i__3,*n); + for (p = igl; p <= i__2; ++p) { + sva[p] = (r__1 = sva[p], abs(r__1)); +/* L2012: */ + } +/* ** */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = snrm2_(m, &a[*n * a_dim1 + 1], &c__1) * work[*n]; + } else { + t = 0.f; + aapp = 1.f; + slassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp) * work[*n]; + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < sqrt((real) (*n)) * tol && (real) (* + n) * mxaapq * mxsinj < tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } + +/* L1993: */ + } +/* end i=1:NSWEEP loop */ + +/* #:( Reaching this point means that the procedure has not converged. */ + *info = 29; + goto L1995; + +L1994: +/* #:) Reaching this point means numerical convergence after the i-th */ +/* sweep. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the singular values and find how many are above */ +/* the underflow threshold. */ + + n2 = 0; + n4 = 0; + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = isamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = work[p]; + work[p] = work[q]; + work[q] = temp1; + sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } + if (sva[p] != 0.f) { + ++n4; + if (sva[p] * skl > sfmin) { + ++n2; + } + } +/* L5991: */ + } + if (sva[*n] != 0.f) { + ++n4; + if (sva[*n] * skl > sfmin) { + ++n2; + } + } + +/* Normalize the left singular vectors. */ + + if (lsvec || uctol) { + i__1 = n2; + for (p = 1; p <= i__1; ++p) { + r__1 = work[p] / sva[p]; + sscal_(m, &r__1, &a[p * a_dim1 + 1], &c__1); +/* L1998: */ + } + } + +/* Scale the product of Jacobi rotations (assemble the fast rotations). */ + + if (rsvec) { + if (applv) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + sscal_(&mvl, &work[p], &v[p * v_dim1 + 1], &c__1); +/* L2398: */ + } + } else { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + temp1 = 1.f / snrm2_(&mvl, &v[p * v_dim1 + 1], &c__1); + sscal_(&mvl, &temp1, &v[p * v_dim1 + 1], &c__1); +/* L2399: */ + } + } + } + +/* Undo scaling, if necessary (and possible). */ + if (skl > 1.f && sva[1] < big / skl || skl < 1.f && sva[f2cmax(n2,1)] > + sfmin / skl) { + i__1 = *n; + for (p = 1; p <= i__1; ++p) { + sva[p] = skl * sva[p]; +/* L2400: */ + } + skl = 1.f; + } + + work[1] = skl; +/* The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE */ +/* then some of the singular values may overflow or underflow and */ +/* the spectrum is given in this factored representation. */ + + work[2] = (real) n4; +/* N4 is the number of computed nonzero singular values of A. */ + + work[3] = (real) n2; +/* N2 is the number of singular values of A greater than SFMIN. */ +/* If N2 +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGESVX computes the solution to system of linear equations A * X = B for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGESVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, */ +/* WORK, IWORK, INFO ) */ + +/* CHARACTER EQUED, FACT, TRANS */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS */ +/* REAL RCOND */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ BERR( * ), C( * ), FERR( * ), R( * ), */ +/* $ WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGESVX uses the LU factorization to compute the solution to a real */ +/* > system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > Error bounds on the solution and a condition estimate are also */ +/* > provided. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* > the system: */ +/* > TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* > TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* > TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* > or diag(C)*B (if TRANS = 'T' or 'C'). */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the */ +/* > matrix A (after equilibration if FACT = 'E') as */ +/* > A = P * L * U, */ +/* > where P is a permutation matrix, L is a unit lower triangular */ +/* > matrix, and U is upper triangular. */ +/* > */ +/* > 3. If some U(i,i)=0, so that U is exactly singular, then the routine */ +/* > returns with INFO = i. Otherwise, the factored form of A is used */ +/* > to estimate the condition number of the matrix A. If the */ +/* > reciprocal of the condition number is less than machine precision, */ +/* > INFO = N+1 is returned as a warning, but the routine still goes on */ +/* > to solve for X and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. Iterative refinement is applied to improve the computed solution */ +/* > matrix and calculate error bounds and backward error estimates */ +/* > for it. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* > that it solves the original system before equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* > If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by R and C. */ +/* > A, AF, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ +/* > not 'N', then A must have been equilibrated by the scaling */ +/* > factors in R and/or C. A is not modified if FACT = 'F' or */ +/* > 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ +/* > */ +/* > On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* > EQUED = 'R': A := diag(R) * A */ +/* > EQUED = 'C': A := A * diag(C) */ +/* > EQUED = 'B': A := diag(R) * A * diag(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is REAL array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the factors L and U from the factorization */ +/* > A = P*L*U as computed by SGETRF. If EQUED .ne. 'N', then */ +/* > AF is the factored form of the equilibrated matrix A. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the equilibrated matrix A (see the description of A for */ +/* > the form of the equilibrated matrix). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > as computed by SGETRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the equilibrated matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] R */ +/* > \verbatim */ +/* > R is REAL array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. R is an input argument if FACT = 'F'; */ +/* > otherwise, R is an output argument. If FACT = 'F' and */ +/* > EQUED = 'R' or 'B', each element of R must be positive. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. C is an input argument if FACT = 'F'; */ +/* > otherwise, C is an output argument. If FACT = 'F' and */ +/* > EQUED = 'C' or 'B', each element of C must be positive. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* > diag(R)*B; */ +/* > if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* > overwritten by diag(C)*B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is REAL array, dimension (LDX,NRHS) */ +/* > If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X */ +/* > to the original system of equations. Note that A and B are */ +/* > modified on exit if EQUED .ne. 'N', and the solution to the */ +/* > equilibrated system is inv(diag(C))*X if TRANS = 'N' and */ +/* > EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' */ +/* > and EQUED = 'R' or 'B'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The estimate of the reciprocal condition number of the matrix */ +/* > A after equilibration (if done). If RCOND is less than the */ +/* > machine precision (in particular, if RCOND = 0), the matrix */ +/* > is singular to working precision. This condition is */ +/* > indicated by a return code of INFO > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is REAL array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (4*N) */ +/* > On exit, WORK(1) contains the reciprocal pivot growth */ +/* > factor norm(A)/norm(U). The "f2cmax absolute element" norm is */ +/* > used. If WORK(1) is much less than 1, then the stability */ +/* > of the LU factorization of the (equilibrated) matrix A */ +/* > could be poor. This also means that the solution X, condition */ +/* > estimator RCOND, and forward error bound FERR could be */ +/* > unreliable. If factorization fails with 0 WORK(1) contains the reciprocal pivot growth factor for the */ +/* > leading INFO columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: U(i,i) is exactly zero. The factorization has */ +/* > been completed, but the factor U is exactly */ +/* > singular, so the solution and error bounds */ +/* > could not be computed. RCOND = 0 is returned. */ +/* > = N+1: U is nonsingular, but RCOND is less than machine */ +/* > precision, meaning that the matrix is singular */ +/* > to working precision. Nevertheless, the */ +/* > solution and error bounds are computed because */ +/* > there are a number of situations where the */ +/* > computed solution can be more accurate than the */ +/* > value of RCOND would suggest. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup realGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int sgesvx_(char *fact, char *trans, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, + integer *ldx, real *rcond, real *ferr, real *berr, real *work, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + real amax; + char norm[1]; + integer i__, j; + extern logical lsame_(char *, char *); + real rcmin, rcmax, anorm; + logical equil; + real colcnd; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + logical nofact; + extern /* Subroutine */ int slaqge_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, char *), + xerbla_(char *, integer *, ftnlen), sgecon_(char *, integer *, + real *, integer *, real *, real *, real *, integer *, integer *); + real bignum; + integer infequ; + logical colequ; + extern /* Subroutine */ int sgeequ_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, integer *), sgerfs_( + char *, integer *, integer *, real *, integer *, real *, integer * + , integer *, real *, integer *, real *, integer *, real *, real *, + real *, integer *, integer *), sgetrf_(integer *, + integer *, real *, integer *, integer *, integer *); + real rowcnd; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *); + logical notran; + extern real slantr_(char *, char *, char *, integer *, integer *, real *, + integer *, real *); + extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, + integer *, integer *, real *, integer *, integer *); + real smlnum; + logical rowequ; + real rpvgrw; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + notran = lsame_(trans, "N"); + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + smlnum = slamch_("Safe minimum"); + bignum = 1.f / smlnum; + } + +/* Test the input parameters. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (lsame_(fact, "F") && ! (rowequ || colequ + || lsame_(equed, "N"))) { + *info = -10; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + r__1 = rcmin, r__2 = r__[j]; + rcmin = f2cmin(r__1,r__2); +/* Computing MAX */ + r__1 = rcmax, r__2 = r__[j]; + rcmax = f2cmax(r__1,r__2); +/* L10: */ + } + if (rcmin <= 0.f) { + *info = -11; + } else if (*n > 0) { + rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + rowcnd = 1.f; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + r__1 = rcmin, r__2 = c__[j]; + rcmin = f2cmin(r__1,r__2); +/* Computing MAX */ + r__1 = rcmax, r__2 = c__[j]; + rcmax = f2cmax(r__1,r__2); +/* L20: */ + } + if (rcmin <= 0.f) { + *info = -12; + } else if (*n > 0) { + colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + colcnd = 1.f; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -14; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESVX", &i__1, (ftnlen)6); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + sgeequ_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, & + amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + slaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & + colcnd, &amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + } + +/* Scale the right hand side. */ + + if (notran) { + if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = r__[i__] * b[i__ + j * b_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = c__[i__] * b[i__ + j * b_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + slacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); + sgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + rpvgrw = slantr_("M", "U", "N", info, info, &af[af_offset], ldaf, + &work[1]); + if (rpvgrw == 0.f) { + rpvgrw = 1.f; + } else { + rpvgrw = slange_("M", n, info, &a[a_offset], lda, &work[1]) / rpvgrw; + } + work[1] = rpvgrw; + *rcond = 0.f; + return 0; + } + } + +/* Compute the norm of the matrix A and the */ +/* reciprocal pivot growth factor RPVGRW. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = slange_(norm, n, n, &a[a_offset], lda, &work[1]); + rpvgrw = slantr_("M", "U", "N", n, n, &af[af_offset], ldaf, &work[1]); + if (rpvgrw == 0.f) { + rpvgrw = 1.f; + } else { + rpvgrw = slange_("M", n, n, &a[a_offset], lda, &work[1]) / + rpvgrw; + } + +/* Compute the reciprocal of the condition number of A. */ + + sgecon_(norm, n, &af[af_offset], ldaf, &anorm, rcond, &work[1], &iwork[1], + info); + +/* Compute the solution matrix X. */ + + slacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + sgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + sgerfs_(trans, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, &ipiv[1], + &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], &berr[1], &work[ + 1], &iwork[1], info); + +/* Transform the solution matrix X to a solution of the original */ +/* system. */ + + if (notran) { + if (colequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] = c__[i__] * x[i__ + j * x_dim1]; +/* L70: */ + } +/* L80: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= colcnd; +/* L90: */ + } + } + } else if (rowequ) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__ + j * x_dim1] = r__[i__] * x[i__ + j * x_dim1]; +/* L100: */ + } +/* L110: */ + } + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] /= rowcnd; +/* L120: */ + } + } + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < slamch_("Epsilon")) { + *info = *n + 1; + } + + work[1] = rpvgrw; + return 0; + +/* End of SGESVX */ + +} /* sgesvx_ */ + diff --git a/lapack-netlib/SRC/sgesvxx.c b/lapack-netlib/SRC/sgesvxx.c new file mode 100644 index 000000000..94de9b371 --- /dev/null +++ b/lapack-netlib/SRC/sgesvxx.c @@ -0,0 +1,1210 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGESVXX computes the solution to system of linear equations A * X = B for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGESVXX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, */ +/* EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, */ +/* BERR, N_ERR_BNDS, ERR_BNDS_NORM, */ +/* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, */ +/* INFO ) */ + +/* CHARACTER EQUED, FACT, TRANS */ +/* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, */ +/* $ N_ERR_BNDS */ +/* REAL RCOND, RPVGRW */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ X( LDX , * ),WORK( * ) */ +/* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGESVXX uses the LU factorization to compute the solution to a */ +/* > real system of linear equations A * X = B, where A is an */ +/* > N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > If requested, both normwise and maximum componentwise error bounds */ +/* > are returned. SGESVXX will return a solution with a tiny */ +/* > guaranteed error (O(eps) where eps is the working machine */ +/* > precision) unless the matrix is very ill-conditioned, in which */ +/* > case a warning is returned. Relevant condition numbers also are */ +/* > calculated and returned. */ +/* > */ +/* > SGESVXX accepts user-provided factorizations and equilibration */ +/* > factors; see the definitions of the FACT and EQUED options. */ +/* > Solving with refinement and using a factorization from a previous */ +/* > SGESVXX call will also produce a solution with either O(eps) */ +/* > errors or warnings, but we cannot make that claim for general */ +/* > user-provided factorizations and equilibration factors if they */ +/* > differ from what SGESVXX would itself produce. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'E', real scaling factors are computed to equilibrate */ +/* > the system: */ +/* > */ +/* > TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B */ +/* > TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B */ +/* > TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B */ +/* > */ +/* > Whether or not the system will be equilibrated depends on the */ +/* > scaling of the matrix A, but if equilibration is used, A is */ +/* > overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') */ +/* > or diag(C)*B (if TRANS = 'T' or 'C'). */ +/* > */ +/* > 2. If FACT = 'N' or 'E', the LU decomposition is used to factor */ +/* > the matrix A (after equilibration if FACT = 'E') as */ +/* > */ +/* > A = P * L * U, */ +/* > */ +/* > where P is a permutation matrix, L is a unit lower triangular */ +/* > matrix, and U is upper triangular. */ +/* > */ +/* > 3. If some U(i,i)=0, so that U is exactly singular, then the */ +/* > routine returns with INFO = i. Otherwise, the factored form of A */ +/* > is used to estimate the condition number of the matrix A (see */ +/* > argument RCOND). If the reciprocal of the condition number is less */ +/* > than machine precision, the routine still goes on to solve for X */ +/* > and compute error bounds as described below. */ +/* > */ +/* > 4. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), */ +/* > the routine will use iterative refinement to try to get a small */ +/* > error and error bounds. Refinement calculates the residual to at */ +/* > least twice the working precision. */ +/* > */ +/* > 6. If equilibration was used, the matrix X is premultiplied by */ +/* > diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so */ +/* > that it solves the original system before equilibration. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \verbatim */ +/* > Some optional parameters are bundled in the PARAMS array. These */ +/* > settings determine how refinement is performed, but often the */ +/* > defaults are acceptable. If the defaults are acceptable, users */ +/* > can pass NPARAMS = 0 which prevents the source code from accessing */ +/* > the PARAMS argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of the matrix A is */ +/* > supplied on entry, and if not, whether the matrix A should be */ +/* > equilibrated before it is factored. */ +/* > = 'F': On entry, AF and IPIV contain the factored form of A. */ +/* > If EQUED is not 'N', the matrix A has been */ +/* > equilibrated with scaling factors given by R and C. */ +/* > A, AF, and IPIV are not modified. */ +/* > = 'N': The matrix A will be copied to AF and factored. */ +/* > = 'E': The matrix A will be equilibrated if necessary, then */ +/* > copied to AF and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is */ +/* > not 'N', then A must have been equilibrated by the scaling */ +/* > factors in R and/or C. A is not modified if FACT = 'F' or */ +/* > 'N', or if FACT = 'E' and EQUED = 'N' on exit. */ +/* > */ +/* > On exit, if EQUED .ne. 'N', A is scaled as follows: */ +/* > EQUED = 'R': A := diag(R) * A */ +/* > EQUED = 'C': A := A * diag(C) */ +/* > EQUED = 'B': A := diag(R) * A * diag(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AF */ +/* > \verbatim */ +/* > AF is REAL array, dimension (LDAF,N) */ +/* > If FACT = 'F', then AF is an input argument and on entry */ +/* > contains the factors L and U from the factorization */ +/* > A = P*L*U as computed by SGETRF. If EQUED .ne. 'N', then */ +/* > AF is the factored form of the equilibrated matrix A. */ +/* > */ +/* > If FACT = 'N', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then AF is an output argument and on exit */ +/* > returns the factors L and U from the factorization A = P*L*U */ +/* > of the equilibrated matrix A (see the description of A for */ +/* > the form of the equilibrated matrix). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > as computed by SGETRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the original matrix A. */ +/* > */ +/* > If FACT = 'E', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the factorization A = P*L*U */ +/* > of the equilibrated matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EQUED */ +/* > \verbatim */ +/* > EQUED is CHARACTER*1 */ +/* > Specifies the form of equilibration that was done. */ +/* > = 'N': No equilibration (always true if FACT = 'N'). */ +/* > = 'R': Row equilibration, i.e., A has been premultiplied by */ +/* > diag(R). */ +/* > = 'C': Column equilibration, i.e., A has been postmultiplied */ +/* > by diag(C). */ +/* > = 'B': Both row and column equilibration, i.e., A has been */ +/* > replaced by diag(R) * A * diag(C). */ +/* > EQUED is an input argument if FACT = 'F'; otherwise, it is an */ +/* > output argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] R */ +/* > \verbatim */ +/* > R is REAL array, dimension (N) */ +/* > The row scale factors for A. If EQUED = 'R' or 'B', A is */ +/* > multiplied on the left by diag(R); if EQUED = 'N' or 'C', R */ +/* > is not accessed. R is an input argument if FACT = 'F'; */ +/* > otherwise, R is an output argument. If FACT = 'F' and */ +/* > EQUED = 'R' or 'B', each element of R must be positive. */ +/* > If R is output, each element of R is a power of the radix. */ +/* > If R is input, each element of R should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (N) */ +/* > The column scale factors for A. If EQUED = 'C' or 'B', A is */ +/* > multiplied on the right by diag(C); if EQUED = 'N' or 'R', C */ +/* > is not accessed. C is an input argument if FACT = 'F'; */ +/* > otherwise, C is an output argument. If FACT = 'F' and */ +/* > EQUED = 'C' or 'B', each element of C must be positive. */ +/* > If C is output, each element of C is a power of the radix. */ +/* > If C is input, each element of C should be a power of the radix */ +/* > to ensure a reliable solution and error estimates. Scaling by */ +/* > powers of the radix does not cause rounding errors unless the */ +/* > result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, */ +/* > if EQUED = 'N', B is not modified; */ +/* > if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by */ +/* > diag(R)*B; */ +/* > if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is */ +/* > overwritten by diag(C)*B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is REAL array, dimension (LDX,NRHS) */ +/* > If INFO = 0, the N-by-NRHS solution matrix X to the original */ +/* > system of equations. Note that A and B are modified on exit */ +/* > if EQUED .ne. 'N', and the solution to the equilibrated system is */ +/* > inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or */ +/* > inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RPVGRW */ +/* > \verbatim */ +/* > RPVGRW is REAL */ +/* > Reciprocal pivot growth. On exit, this contains the reciprocal */ +/* > pivot growth factor norm(A)/norm(U). The "f2cmax absolute element" */ +/* > norm is used. If this is much less than 1, then the stability of */ +/* > the LU factorization of the (equilibrated) matrix A could be poor. */ +/* > This also means that the solution X, estimated condition numbers, */ +/* > and error bounds could be unreliable. If factorization fails with */ +/* > 0 for the leading INFO columns of A. In SGESVX, this quantity is */ +/* > returned in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > Componentwise relative backward error. This is the */ +/* > componentwise relative backward error of each solution vector X(j) */ +/* > (i.e., the smallest relative change in any element of A or B that */ +/* > makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_ERR_BNDS */ +/* > \verbatim */ +/* > N_ERR_BNDS is INTEGER */ +/* > Number of error bounds to return for each right hand side */ +/* > and each type (normwise or componentwise). See ERR_BNDS_NORM and */ +/* > ERR_BNDS_COMP below. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NPARAMS */ +/* > \verbatim */ +/* > NPARAMS is INTEGER */ +/* > Specifies the number of parameters set in PARAMS. If <= 0, the */ +/* > PARAMS array is never referenced and default values are used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PARAMS */ +/* > \verbatim */ +/* > PARAMS is REAL array, dimension NPARAMS */ +/* > Specifies algorithm parameters. If an entry is < 0.0, then */ +/* > that entry will be filled with default value used for that */ +/* > parameter. Only positions up to NPARAMS are accessed; defaults */ +/* > are used for higher-numbered parameters. */ +/* > */ +/* > PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative */ +/* > refinement or not. */ +/* > Default: 1.0 */ +/* > = 0.0: No refinement is performed, and no error bounds are */ +/* > computed. */ +/* > = 1.0: Use the double-precision refinement algorithm, */ +/* > possibly with doubled-single computations if the */ +/* > compilation environment does not support DOUBLE */ +/* > PRECISION. */ +/* > (other values are reserved for future use) */ +/* > */ +/* > PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual */ +/* > computations allowed for refinement. */ +/* > Default: 10 */ +/* > Aggressive: Set to 100 to permit convergence using approximate */ +/* > factorizations or factorizations other than LU. If */ +/* > the factorization uses a technique other than */ +/* > Gaussian elimination, the guarantees in */ +/* > err_bnds_norm and err_bnds_comp may no longer be */ +/* > trustworthy. */ +/* > */ +/* > PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code */ +/* > will attempt to find a solution with small componentwise */ +/* > relative error in the double-precision algorithm. Positive */ +/* > is true, 0.0 is false. */ +/* > Default: 1.0 (attempt componentwise convergence) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. The solution to every right-hand side is */ +/* > guaranteed. */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly singular, so */ +/* > the solution and error bounds could not be computed. RCOND = 0 */ +/* > is returned. */ +/* > = N+J: The solution corresponding to the Jth right-hand side is */ +/* > not guaranteed. The solutions corresponding to other right- */ +/* > hand sides K with K > J may not be guaranteed as well, but */ +/* > only the first such right-hand side is reported. If a small */ +/* > componentwise error is not requested (PARAMS(3) = 0.0) then */ +/* > the Jth right-hand side is the first with a normwise error */ +/* > bound that is not guaranteed (the smallest J such */ +/* > that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) */ +/* > the Jth right-hand side is the first with either a normwise or */ +/* > componentwise error bound that is not guaranteed (the smallest */ +/* > J such that either ERR_BNDS_NORM(J,1) = 0.0 or */ +/* > ERR_BNDS_COMP(J,1) = 0.0). See the definition of */ +/* > ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information */ +/* > about all of the right-hand sides check ERR_BNDS_NORM or */ +/* > ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup realGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int sgesvxx_(char *fact, char *trans, integer *n, integer * + nrhs, real *a, integer *lda, real *af, integer *ldaf, integer *ipiv, + char *equed, real *r__, real *c__, real *b, integer *ldb, real *x, + integer *ldx, real *rcond, real *rpvgrw, real *berr, integer * + n_err_bnds__, real *err_bnds_norm__, real *err_bnds_comp__, integer * + nparams, real *params, real *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, + x_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1; + real r__1, r__2; + + /* Local variables */ + real amax; + extern real sla_gerpvgrw_(integer *, integer *, real *, integer *, real * + , integer *); + integer j; + extern logical lsame_(char *, char *); + real rcmin, rcmax; + logical equil; + real colcnd; + extern real slamch_(char *); + logical nofact; + extern /* Subroutine */ int slaqge_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, char *), + xerbla_(char *, integer *, ftnlen); + real bignum; + integer infequ; + logical colequ; + extern /* Subroutine */ int sgetrf_(integer *, integer *, real *, integer + *, integer *, integer *); + real rowcnd; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *); + logical notran; + extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, + integer *, integer *, real *, integer *, integer *); + real smlnum; + logical rowequ; + extern /* Subroutine */ int slascl2_(integer *, integer *, real *, real *, + integer *), sgeequb_(integer *, integer *, real *, integer *, + real *, real *, real *, real *, real *, integer *), sgerfsx_(char + *, char *, integer *, integer *, real *, integer *, real *, + integer *, integer *, real *, real *, real *, integer *, real *, + integer *, real *, real *, integer *, real *, real *, integer *, + real *, real *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ================================================================== */ + + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --r__; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --berr; + --params; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + equil = lsame_(fact, "E"); + notran = lsame_(trans, "N"); + smlnum = slamch_("Safe minimum"); + bignum = 1.f / smlnum; + if (nofact || equil) { + *(unsigned char *)equed = 'N'; + rowequ = FALSE_; + colequ = FALSE_; + } else { + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + +/* Default is failure. If an input parameter is wrong or */ +/* factorization fails, make everything look horrible. Only the */ +/* pivot growth is set here, the rest is initialized in SGERFSX. */ + + *rpvgrw = 0.f; + +/* Test the input parameters. PARAMS is not tested until SGERFSX. */ + + if (! nofact && ! equil && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*n)) { + *info = -6; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -8; + } else if (lsame_(fact, "F") && ! (rowequ || colequ + || lsame_(equed, "N"))) { + *info = -10; + } else { + if (rowequ) { + rcmin = bignum; + rcmax = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + r__1 = rcmin, r__2 = r__[j]; + rcmin = f2cmin(r__1,r__2); +/* Computing MAX */ + r__1 = rcmax, r__2 = r__[j]; + rcmax = f2cmax(r__1,r__2); +/* L10: */ + } + if (rcmin <= 0.f) { + *info = -11; + } else if (*n > 0) { + rowcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + rowcnd = 1.f; + } + } + if (colequ && *info == 0) { + rcmin = bignum; + rcmax = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + r__1 = rcmin, r__2 = c__[j]; + rcmin = f2cmin(r__1,r__2); +/* Computing MAX */ + r__1 = rcmax, r__2 = c__[j]; + rcmax = f2cmax(r__1,r__2); +/* L20: */ + } + if (rcmin <= 0.f) { + *info = -12; + } else if (*n > 0) { + colcnd = f2cmax(rcmin,smlnum) / f2cmin(rcmax,bignum); + } else { + colcnd = 1.f; + } + } + if (*info == 0) { + if (*ldb < f2cmax(1,*n)) { + *info = -14; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESVXX", &i__1, (ftnlen)7); + return 0; + } + + if (equil) { + +/* Compute row and column scalings to equilibrate the matrix A. */ + + sgeequb_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, &colcnd, + &amax, &infequ); + if (infequ == 0) { + +/* Equilibrate the matrix. */ + + slaqge_(n, n, &a[a_offset], lda, &r__[1], &c__[1], &rowcnd, & + colcnd, &amax, equed); + rowequ = lsame_(equed, "R") || lsame_(equed, + "B"); + colequ = lsame_(equed, "C") || lsame_(equed, + "B"); + } + +/* If the scaling factors are not applied, set them to 1.0. */ + + if (! rowequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + r__[j] = 1.f; + } + } + if (! colequ) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j] = 1.f; + } + } + } + +/* Scale the right-hand side. */ + + if (notran) { + if (rowequ) { + slascl2_(n, nrhs, &r__[1], &b[b_offset], ldb); + } + } else { + if (colequ) { + slascl2_(n, nrhs, &c__[1], &b[b_offset], ldb); + } + } + + if (nofact || equil) { + +/* Compute the LU factorization of A. */ + + slacpy_("Full", n, n, &a[a_offset], lda, &af[af_offset], ldaf); + sgetrf_(n, n, &af[af_offset], ldaf, &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + +/* Pivot in column INFO is exactly 0 */ +/* Compute the reciprocal pivot growth factor of the */ +/* leading rank-deficient INFO columns of A. */ + + *rpvgrw = sla_gerpvgrw_(n, info, &a[a_offset], lda, &af[ + af_offset], ldaf); + return 0; + } + } + +/* Compute the reciprocal pivot growth factor RPVGRW. */ + + *rpvgrw = sla_gerpvgrw_(n, n, &a[a_offset], lda, &af[af_offset], ldaf); + +/* Compute the solution matrix X. */ + + slacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + sgetrs_(trans, n, nrhs, &af[af_offset], ldaf, &ipiv[1], &x[x_offset], ldx, + info); + +/* Use iterative refinement to improve the computed solution and */ +/* compute error bounds and backward error estimates for it. */ + + sgerfsx_(trans, equed, n, nrhs, &a[a_offset], lda, &af[af_offset], ldaf, & + ipiv[1], &r__[1], &c__[1], &b[b_offset], ldb, &x[x_offset], ldx, + rcond, &berr[1], n_err_bnds__, &err_bnds_norm__[ + err_bnds_norm_offset], &err_bnds_comp__[err_bnds_comp_offset], + nparams, ¶ms[1], &work[1], &iwork[1], info); + +/* Scale solutions. */ + + if (colequ && notran) { + slascl2_(n, nrhs, &c__[1], &x[x_offset], ldx); + } else if (rowequ && ! notran) { + slascl2_(n, nrhs, &r__[1], &x[x_offset], ldx); + } + + return 0; + +/* End of SGESVXX */ +} /* sgesvxx_ */ + diff --git a/lapack-netlib/SRC/sgetc2.c b/lapack-netlib/SRC/sgetc2.c new file mode 100644 index 000000000..28aab8829 --- /dev/null +++ b/lapack-netlib/SRC/sgetc2.c @@ -0,0 +1,645 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGETC2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) */ + +/* INTEGER INFO, LDA, N */ +/* INTEGER IPIV( * ), JPIV( * ) */ +/* REAL A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGETC2 computes an LU factorization with complete pivoting of the */ +/* > n-by-n matrix A. The factorization has the form A = P * L * U * Q, */ +/* > where P and Q are permutation matrices, L is lower triangular with */ +/* > unit diagonal elements and U is upper triangular. */ +/* > */ +/* > This is the Level 2 BLAS algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the n-by-n matrix A to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U*Q; the unit diagonal elements of L are not stored. */ +/* > If U(k, k) appears to be less than SMIN, U(k, k) is given the */ +/* > value of SMIN, i.e., giving a nonsingular perturbed system. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension(N). */ +/* > The pivot indices; for 1 <= i <= N, row i of the */ +/* > matrix has been interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] JPIV */ +/* > \verbatim */ +/* > JPIV is INTEGER array, dimension(N). */ +/* > The pivot indices; for 1 <= j <= N, column j of the */ +/* > matrix has been interchanged with column JPIV(j). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > > 0: if INFO = k, U(k, k) is likely to produce overflow if */ +/* > we try to solve for x in Ax = b. So U is perturbed to */ +/* > avoid the overflow. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup realGEauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ +/* > Umea University, S-901 87 Umea, Sweden. */ + +/* ===================================================================== */ +/* Subroutine */ int sgetc2_(integer *n, real *a, integer *lda, integer *ipiv, + integer *jpiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + real smin, xmax; + integer i__, j; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), slabad_(real *, real *); + integer ip, jp; + extern real slamch_(char *); + real bignum, smlnum, eps; + integer ipv, jpv; + + +/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --jpiv; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Set constants to control overflow */ + + eps = slamch_("P"); + smlnum = slamch_("S") / eps; + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Handle the case N=1 by itself */ + + if (*n == 1) { + ipiv[1] = 1; + jpiv[1] = 1; + if ((r__1 = a[a_dim1 + 1], abs(r__1)) < smlnum) { + *info = 1; + a[a_dim1 + 1] = smlnum; + } + return 0; + } + +/* Factorize A using complete pivoting. */ +/* Set pivots less than SMIN to SMIN. */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Find f2cmax element in matrix A */ + + xmax = 0.f; + i__2 = *n; + for (ip = i__; ip <= i__2; ++ip) { + i__3 = *n; + for (jp = i__; jp <= i__3; ++jp) { + if ((r__1 = a[ip + jp * a_dim1], abs(r__1)) >= xmax) { + xmax = (r__1 = a[ip + jp * a_dim1], abs(r__1)); + ipv = ip; + jpv = jp; + } +/* L10: */ + } +/* L20: */ + } + if (i__ == 1) { +/* Computing MAX */ + r__1 = eps * xmax; + smin = f2cmax(r__1,smlnum); + } + +/* Swap rows */ + + if (ipv != i__) { + sswap_(n, &a[ipv + a_dim1], lda, &a[i__ + a_dim1], lda); + } + ipiv[i__] = ipv; + +/* Swap columns */ + + if (jpv != i__) { + sswap_(n, &a[jpv * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], & + c__1); + } + jpiv[i__] = jpv; + +/* Check for singularity */ + + if ((r__1 = a[i__ + i__ * a_dim1], abs(r__1)) < smin) { + *info = i__; + a[i__ + i__ * a_dim1] = smin; + } + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + a[j + i__ * a_dim1] /= a[i__ + i__ * a_dim1]; +/* L30: */ + } + i__2 = *n - i__; + i__3 = *n - i__; + sger_(&i__2, &i__3, &c_b10, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ + + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); +/* L40: */ + } + + if ((r__1 = a[*n + *n * a_dim1], abs(r__1)) < smin) { + *info = *n; + a[*n + *n * a_dim1] = smin; + } + +/* Set last pivots to N */ + + ipiv[*n] = *n; + jpiv[*n] = *n; + + return 0; + +/* End of SGETC2 */ + +} /* sgetc2_ */ + diff --git a/lapack-netlib/SRC/sgetf2.c b/lapack-netlib/SRC/sgetf2.c new file mode 100644 index 000000000..1bb1cc839 --- /dev/null +++ b/lapack-netlib/SRC/sgetf2.c @@ -0,0 +1,618 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row + interchanges (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGETF2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGETF2( M, N, A, LDA, IPIV, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* INTEGER IPIV( * ) */ +/* REAL A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGETF2 computes an LU factorization of a general m-by-n matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the right-looking Level 2 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the m by n matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + integer i__, j; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + real sfmin; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *); + integer jp; + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer isamax_(integer *, real *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETF2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = slamch_("S"); + + i__1 = f2cmin(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + if (a[jp + j * a_dim1] != 0.f) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + sswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + if ((r__1 = a[j + j * a_dim1], abs(r__1)) >= sfmin) { + i__2 = *m - j; + r__1 = 1.f / a[j + j * a_dim1]; + sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; +/* L20: */ + } + } + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < f2cmin(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + sger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + ( + j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + return 0; + +/* End of SGETF2 */ + +} /* sgetf2_ */ + diff --git a/lapack-netlib/SRC/sgetrf.c b/lapack-netlib/SRC/sgetrf.c new file mode 100644 index 000000000..f4f7a4977 --- /dev/null +++ b/lapack-netlib/SRC/sgetrf.c @@ -0,0 +1,643 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGETRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGETRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* INTEGER IPIV( * ) */ +/* REAL A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGETRF computes an LU factorization of a general M-by-N matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the right-looking Level 3 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, j, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), strsm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, + integer *); + integer jb, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *), sgetrf2_(integer *, integer * + , real *, integer *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= f2cmin(*m,*n)) { + +/* Use unblocked code. */ + + sgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = f2cmin(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = f2cmin(*m,*n) - j + 1; + jb = f2cmin(i__3,nb); + +/* Factor diagonal and subdiagonal blocks and test for exact */ +/* singularity. */ + + i__3 = *m - j + 1; + sgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = f2cmin(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) * + a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of SGETRF */ + +} /* sgetrf_ */ + diff --git a/lapack-netlib/SRC/sgetrf2.c b/lapack-netlib/SRC/sgetrf2.c new file mode 100644 index 000000000..77c7ecacc --- /dev/null +++ b/lapack-netlib/SRC/sgetrf2.c @@ -0,0 +1,681 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGETRF2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGETRF2( M, N, A, LDA, IPIV, INFO ) */ + +/* INTEGER INFO, LDA, M, N */ +/* INTEGER IPIV( * ) */ +/* REAL A( LDA, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGETRF2 computes an LU factorization of a general M-by-N matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the recursive version of the algorithm. It divides */ +/* > the matrix into four submatrices: */ +/* > */ +/* > [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 */ +/* > A = [ -----|----- ] with n1 = f2cmin(m,n)/2 */ +/* > [ A21 | A22 ] n2 = n-n1 */ +/* > */ +/* > [ A11 ] */ +/* > The subroutine calls itself to factor [ --- ], */ +/* > [ A12 ] */ +/* > [ A12 ] */ +/* > do the swaps on [ --- ], solve A12, update A22, */ +/* > [ A22 ] */ +/* > */ +/* > then calls itself to factor A22 and do the swaps on A21. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (f2cmin(M,N)) */ +/* > The pivot indices; for 1 <= i <= f2cmin(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgetrf2_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + real temp; + integer i__, iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemm_(char *, char *, integer *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + real sfmin; + integer n1, n2; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ); + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETRF2", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (*m == 1) { + +/* Use unblocked code for one row case */ +/* Just need to handle IPIV and INFO */ + + ipiv[1] = 1; + if (a[a_dim1 + 1] == 0.f) { + *info = 1; + } + + } else if (*n == 1) { + +/* Use unblocked code for one column case */ + + +/* Compute machine safe minimum */ + + sfmin = slamch_("S"); + +/* Find pivot and test for singularity */ + + i__ = isamax_(m, &a[a_dim1 + 1], &c__1); + ipiv[1] = i__; + if (a[i__ + a_dim1] != 0.f) { + +/* Apply the interchange */ + + if (i__ != 1) { + temp = a[a_dim1 + 1]; + a[a_dim1 + 1] = a[i__ + a_dim1]; + a[i__ + a_dim1] = temp; + } + +/* Compute elements 2:M of the column */ + + if ((r__1 = a[a_dim1 + 1], abs(r__1)) >= sfmin) { + i__1 = *m - 1; + r__1 = 1.f / a[a_dim1 + 1]; + sscal_(&i__1, &r__1, &a[a_dim1 + 2], &c__1); + } else { + i__1 = *m - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + 1 + a_dim1] /= a[a_dim1 + 1]; +/* L10: */ + } + } + + } else { + *info = 1; + } + + } else { + +/* Use recursive code */ + + n1 = f2cmin(*m,*n) / 2; + n2 = *n - n1; + +/* [ A11 ] */ +/* Factor [ --- ] */ +/* [ A21 ] */ + + sgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + +/* [ A12 ] */ +/* Apply interchanges to [ --- ] */ +/* [ A22 ] */ + + slaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], & + c__1); + +/* Solve A12 */ + + strsm_("L", "L", "N", "U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[( + n1 + 1) * a_dim1 + 1], lda); + +/* Update A22 */ + + i__1 = *m - n1; + sgemm_("N", "N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda, & + a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * + a_dim1], lda); + +/* Factor A22 */ + + i__1 = *m - n1; + sgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + + 1], &iinfo); + +/* Adjust INFO and the pivot indices */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + n1; + } + i__1 = f2cmin(*m,*n); + for (i__ = n1 + 1; i__ <= i__1; ++i__) { + ipiv[i__] += n1; +/* L20: */ + } + +/* Apply interchanges to A21 */ + + i__1 = n1 + 1; + i__2 = f2cmin(*m,*n); + slaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1); + + } + return 0; + +/* End of SGETRF2 */ + +} /* sgetrf2_ */ + diff --git a/lapack-netlib/SRC/sgetri.c b/lapack-netlib/SRC/sgetri.c new file mode 100644 index 000000000..003d72975 --- /dev/null +++ b/lapack-netlib/SRC/sgetri.c @@ -0,0 +1,690 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGETRI */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGETRI + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LWORK, N */ +/* INTEGER IPIV( * ) */ +/* REAL A( LDA, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGETRI computes the inverse of a matrix using the LU factorization */ +/* > computed by SGETRF. */ +/* > */ +/* > This method inverts U and then computes inv(A) by solving the system */ +/* > inv(A)*L = inv(U) for inv(A). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the factors L and U from the factorization */ +/* > A = P*L*U as computed by SGETRF. */ +/* > On exit, if INFO = 0, the inverse of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from SGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > For optimal performance LWORK >= N*NB, where NB is */ +/* > the optimal blocksize returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero; the matrix is */ +/* > singular and its inverse could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgetri_(integer *n, real *a, integer *lda, integer *ipiv, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, nbmin; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), sgemv_(char *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), sswap_(integer *, real *, integer *, + real *, integer *), strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ); + integer jb, nb, jj, jp, nn; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + extern /* Subroutine */ int strtri_(char *, char *, integer *, real *, + integer *, integer *); + integer iws; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SGETRI", " ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = *n * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*lda < f2cmax(1,*n)) { + *info = -3; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETRI", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form inv(U). If INFO > 0 from STRTRI, then U is singular, */ +/* and the inverse is not computed. */ + + strtri_("Upper", "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { +/* Computing MAX */ + i__1 = ldwork * nb; + iws = f2cmax(i__1,1); + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGETRI", " ", n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + } + } else { + iws = *n; + } + +/* Solve the equation inv(A)*L = inv(U) for inv(A). */ + + if (nb < nbmin || nb >= *n) { + +/* Use unblocked code. */ + + for (j = *n; j >= 1; --j) { + +/* Copy current column of L to WORK and replace with zeros. */ + + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + work[i__] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = 0.f; +/* L10: */ + } + +/* Compute current column of inv(A). */ + + if (j < *n) { + i__1 = *n - j; + sgemv_("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + + 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + + 1], &c__1); + } +/* L20: */ + } + } else { + +/* Use blocked code. */ + + nn = (*n - 1) / nb * nb + 1; + i__1 = -nb; + for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *n - j + 1; + jb = f2cmin(i__2,i__3); + +/* Copy current block column of L to WORK and replace with */ +/* zeros. */ + + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = *n; + for (i__ = jj + 1; i__ <= i__3; ++i__) { + work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1]; + a[i__ + jj * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + +/* Compute current block column of inv(A). */ + + if (j + jb <= *n) { + i__2 = *n - j - jb + 1; + sgemm_("No transpose", "No transpose", n, &jb, &i__2, &c_b20, + &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], & + ldwork, &c_b22, &a[j * a_dim1 + 1], lda); + } + strsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, & + work[j], &ldwork, &a[j * a_dim1 + 1], lda); +/* L50: */ + } + } + +/* Apply column interchanges. */ + + for (j = *n - 1; j >= 1; --j) { + jp = ipiv[j]; + if (jp != j) { + sswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); + } +/* L60: */ + } + + work[1] = (real) iws; + return 0; + +/* End of SGETRI */ + +} /* sgetri_ */ + diff --git a/lapack-netlib/SRC/sgetrs.c b/lapack-netlib/SRC/sgetrs.c new file mode 100644 index 000000000..b27fbabba --- /dev/null +++ b/lapack-netlib/SRC/sgetrs.c @@ -0,0 +1,619 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGETRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGETRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* REAL A( LDA, * ), B( LDB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGETRS solves a system of linear equations */ +/* > A * X = B or A**T * X = B */ +/* > with a general N-by-N matrix A using the LU factorization computed */ +/* > by SGETRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T* X = B (Transpose) */ +/* > = 'C': A**T* X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by SGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from SGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a, + integer *lda, integer *ipiv, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *, ftnlen); + logical notran; + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* Solve A * X = B. */ + +/* Apply row interchanges to the right hand sides. */ + + slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* Solve A**T * X = B. */ + +/* Solve U**T *X = B, overwriting B with X. */ + + strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L**T *X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of SGETRS */ + +} /* sgetrs_ */ + diff --git a/lapack-netlib/SRC/sgetsls.c b/lapack-netlib/SRC/sgetsls.c new file mode 100644 index 000000000..e2bfef09e --- /dev/null +++ b/lapack-netlib/SRC/sgetsls.c @@ -0,0 +1,929 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGETSLS */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, */ +/* $ WORK, LWORK, INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS */ +/* REAL A( LDA, * ), B( LDB, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGETSLS solves overdetermined or underdetermined real linear systems */ +/* > involving an M-by-N matrix A, using a tall skinny QR or short wide LQ */ +/* > factorization of A. It is assumed that A has full rank. */ +/* > */ +/* > */ +/* > */ +/* > The following options are provided: */ +/* > */ +/* > 1. If TRANS = 'N' and m >= n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A*X ||. */ +/* > */ +/* > 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ +/* > an underdetermined system A * X = B. */ +/* > */ +/* > 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ +/* > an undetermined system A**T * X = B. */ +/* > */ +/* > 4. If TRANS = 'T' and m < n: find the least squares solution of */ +/* > an overdetermined system, i.e., solve the least squares problem */ +/* > minimize || B - A**T * X ||. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': the linear system involves A; */ +/* > = 'T': the linear system involves A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of */ +/* > columns of the matrices B and X. NRHS >=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > A is overwritten by details of its QR or LQ */ +/* > factorization as returned by SGEQR or SGELQ. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the matrix B of right hand side vectors, stored */ +/* > columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ +/* > if TRANS = 'T'. */ +/* > On exit, if INFO = 0, B is overwritten by the solution */ +/* > vectors, stored columnwise: */ +/* > if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ +/* > squares solution vectors. */ +/* > if TRANS = 'N' and m < n, rows 1 to N of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ +/* > minimum norm solution vectors; */ +/* > if TRANS = 'T' and m < n, rows 1 to M of B contain the */ +/* > least squares solution vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= MAX(1,M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) contains optimal (or either minimal */ +/* > or optimal, if query was assumed) LWORK. */ +/* > See LWORK for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If LWORK = -1 or -2, then a workspace query is assumed. */ +/* > If LWORK = -1, the routine calculates optimal size of WORK for the */ +/* > optimal performance and returns this value in WORK(1). */ +/* > If LWORK = -2, the routine calculates minimal size of WORK and */ +/* > returns this value in WORK(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the i-th diagonal element of the */ +/* > triangular factor of A is zero, so that A does not have */ +/* > full rank; the least squares solution could not be */ +/* > computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup realGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int sgetsls_(char *trans, integer *m, integer *n, integer * + nrhs, real *a, integer *lda, real *b, integer *ldb, real *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + real anrm, bnrm; + logical tran; + integer brow, tszm, tszo, info2, i__, j, iascl, ibscl; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgelq_(integer *, integer *, real *, integer * + , real *, integer *, real *, integer *, integer *); + integer minmn, maxmn; + extern /* Subroutine */ int sgeqr_(integer *, integer *, real *, integer * + , real *, integer *, real *, integer *, integer *); + real workq[1]; + extern /* Subroutine */ int slabad_(real *, real *); + real tq[5]; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer scllen; + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), sgemlq_(char *, char *, integer *, integer *, integer *, + real *, integer *, real *, integer *, real *, integer *, real *, + integer *, integer *), slaset_(char *, integer *, + integer *, real *, real *, real *, integer *), sgemqr_( + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, integer *, real *, integer *, real *, integer *, + integer *); + real smlnum; + integer wsizem, wsizeo; + logical lquery; + integer lw1, lw2; + extern /* Subroutine */ int strtrs_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, integer *, integer *); + integer mnk, lwm, lwo; + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --work; + + /* Function Body */ + *info = 0; + minmn = f2cmin(*m,*n); + maxmn = f2cmax(*m,*n); + mnk = f2cmax(minmn,*nrhs); + tran = lsame_(trans, "T"); + + lquery = *lwork == -1 || *lwork == -2; + if (! (lsame_(trans, "N") || lsame_(trans, "T"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*lda < f2cmax(1,*m)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m); + if (*ldb < f2cmax(i__1,*n)) { + *info = -8; + } + } + + if (*info == 0) { + +/* Determine the block size and minimum LWORK */ + + if (*m >= *n) { + sgeqr_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2); + tszo = (integer) tq[0]; + lwo = (integer) workq[0]; + sgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszo, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwo, i__2 = (integer) workq[0]; + lwo = f2cmax(i__1,i__2); + sgeqr_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2); + tszm = (integer) tq[0]; + lwm = (integer) workq[0]; + sgemqr_("L", trans, m, nrhs, n, &a[a_offset], lda, tq, &tszm, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwm, i__2 = (integer) workq[0]; + lwm = f2cmax(i__1,i__2); + wsizeo = tszo + lwo; + wsizem = tszm + lwm; + } else { + sgelq_(m, n, &a[a_offset], lda, tq, &c_n1, workq, &c_n1, &info2); + tszo = (integer) tq[0]; + lwo = (integer) workq[0]; + sgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszo, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwo, i__2 = (integer) workq[0]; + lwo = f2cmax(i__1,i__2); + sgelq_(m, n, &a[a_offset], lda, tq, &c_n2, workq, &c_n2, &info2); + tszm = (integer) tq[0]; + lwm = (integer) workq[0]; + sgemlq_("L", trans, n, nrhs, m, &a[a_offset], lda, tq, &tszm, &b[ + b_offset], ldb, workq, &c_n1, &info2); +/* Computing MAX */ + i__1 = lwm, i__2 = (integer) workq[0]; + lwm = f2cmax(i__1,i__2); + wsizeo = tszo + lwo; + wsizem = tszm + lwm; + } + + if (*lwork < wsizem && ! lquery) { + *info = -10; + } + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETSLS", &i__1, (ftnlen)7); + work[1] = (real) wsizeo; + return 0; + } + if (lquery) { + if (*lwork == -1) { + work[1] = (real) wsizeo; + } + if (*lwork == -2) { + work[1] = (real) wsizem; + } + return 0; + } + if (*lwork < wsizeo) { + lw1 = tszm; + lw2 = lwm; + } else { + lw1 = tszo; + lw2 = lwo; + } + +/* Quick return if possible */ + +/* Computing MIN */ + i__1 = f2cmin(*m,*n); + if (f2cmin(i__1,*nrhs) == 0) { + i__1 = f2cmax(*m,*n); + slaset_("FULL", &i__1, nrhs, &c_b23, &c_b23, &b[b_offset], ldb); + return 0; + } + +/* Get machine parameters */ + + smlnum = slamch_("S") / slamch_("P"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + +/* Scale A, B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0.f && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.f) { + +/* Matrix all zero. Return zero solution. */ + + slaset_("F", &maxmn, nrhs, &c_b23, &c_b23, &b[b_offset], ldb); + goto L50; + } + + brow = *m; + if (tran) { + brow = *n; + } + bnrm = slange_("M", &brow, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0.f && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + slascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], + ldb, info); + ibscl = 2; + } + + if (*m >= *n) { + +/* compute QR factorization of A */ + + sgeqr_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, + info); + if (! tran) { + +/* Least-Squares Problem f2cmin || A * X - B || */ + +/* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) */ + + sgemqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + +/* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ + + strtrs_("U", "N", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + if (*info > 0) { + return 0; + } + scllen = *n; + } else { + +/* Overdetermined system of equations A**T * X = B */ + +/* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) */ + + strtrs_("U", "T", "N", n, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(N+1:M,1:NRHS) = ZERO */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *n + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + +/* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ + + sgemqr_("L", "N", m, nrhs, n, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + + scllen = *m; + + } + + } else { + +/* Compute LQ factorization of A */ + + sgelq_(m, n, &a[a_offset], lda, &work[lw2 + 1], &lw1, &work[1], &lw2, + info); + +/* workspace at least M, optimally M*NB. */ + + if (! tran) { + +/* underdetermined system of equations A * X = B */ + +/* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ + + strtrs_("L", "N", "N", m, nrhs, &a[a_offset], lda, &b[b_offset], + ldb, info); + + if (*info > 0) { + return 0; + } + +/* B(M+1:N,1:NRHS) = 0 */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = *m + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + +/* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) */ + + sgemlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + + scllen = *n; + + } else { + +/* overdetermined system f2cmin || A**T * X - B || */ + +/* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ + + sgemlq_("L", "N", n, nrhs, m, &a[a_offset], lda, &work[lw2 + 1], & + lw1, &b[b_offset], ldb, &work[1], &lw2, info); + +/* workspace at least NRHS, optimally NRHS*NB */ + +/* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) */ + + strtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], + lda, &b[b_offset], ldb, info); + + if (*info > 0) { + return 0; + } + + scllen = *m; + + } + + } + +/* Undo scaling */ + + if (iascl == 1) { + slascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (iascl == 2) { + slascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + if (ibscl == 1) { + slascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } else if (ibscl == 2) { + slascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] + , ldb, info); + } + +L50: + work[1] = (real) (tszo + lwo); + return 0; + +/* End of SGETSLS */ + +} /* sgetsls_ */ + diff --git a/lapack-netlib/SRC/sgetsqrhrt.c b/lapack-netlib/SRC/sgetsqrhrt.c new file mode 100644 index 000000000..29095f099 --- /dev/null +++ b/lapack-netlib/SRC/sgetsqrhrt.c @@ -0,0 +1,765 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGETSQRHRT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGETSQRHRT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGETSQRHRT( M, N, MB1, NB1, NB2, A, LDA, T, LDT, WORK, */ +/* $ LWORK, INFO ) */ +/* IMPLICIT NONE */ + +/* INTEGER INFO, LDA, LDT, LWORK, M, N, NB1, NB2, MB1 */ +/* REAL A( LDA, * ), T( LDT, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGETSQRHRT computes a NB2-sized column blocked QR-factorization */ +/* > of a complex M-by-N matrix A with M >= N, */ +/* > */ +/* > A = Q * R. */ +/* > */ +/* > The routine uses internally a NB1-sized column blocked and MB1-sized */ +/* > row blocked TSQR-factorization and perfors the reconstruction */ +/* > of the Householder vectors from the TSQR output. The routine also */ +/* > converts the R_tsqr factor from the TSQR-factorization output into */ +/* > the R factor that corresponds to the Householder QR-factorization, */ +/* > */ +/* > A = Q_tsqr * R_tsqr = Q * R. */ +/* > */ +/* > The output Q and R factors are stored in the same format as in SGEQRT */ +/* > (Q is in blocked compact WY-representation). See the documentation */ +/* > of SGEQRT for more details on the format. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MB1 */ +/* > \verbatim */ +/* > MB1 is INTEGER */ +/* > The row block size to be used in the blocked TSQR. */ +/* > MB1 > N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB1 */ +/* > \verbatim */ +/* > NB1 is INTEGER */ +/* > The column block size to be used in the blocked TSQR. */ +/* > N >= NB1 >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB2 */ +/* > \verbatim */ +/* > NB2 is INTEGER */ +/* > The block size to be used in the blocked QR that is */ +/* > output. NB2 >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > */ +/* > On entry: an M-by-N matrix A. */ +/* > */ +/* > On exit: */ +/* > a) the elements on and above the diagonal */ +/* > of the array contain the N-by-N upper-triangular */ +/* > matrix R corresponding to the Householder QR; */ +/* > b) the elements below the diagonal represent Q by */ +/* > the columns of blocked V (compact WY-representation). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT,N)) */ +/* > The upper triangular block reflectors stored in compact form */ +/* > as a sequence of upper triangular blocks. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= NB2. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > (workspace) REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > The dimension of the array WORK. */ +/* > LWORK >= MAX( LWT + LW1, MAX( LWT+N*N+LW2, LWT+N*N+N ) ), */ +/* > where */ +/* > NUM_ALL_ROW_BLOCKS = CEIL((M-N)/(MB1-N)), */ +/* > NB1LOCAL = MIN(NB1,N). */ +/* > LWT = NUM_ALL_ROW_BLOCKS * N * NB1LOCAL, */ +/* > LW1 = NB1LOCAL * N, */ +/* > LW2 = NB1LOCAL * MAX( NB1LOCAL, ( N - NB1LOCAL ) ), */ +/* > If LWORK = -1, then a workspace query is assumed. */ +/* > The routine only calculates the optimal size of the WORK */ +/* > array, returns this value as the first entry of the WORK */ +/* > array, and no error message related to LWORK is issued */ +/* > by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup singleOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > November 2020, Igor Kozachenko, */ +/* > Computer Science Division, */ +/* > University of California, Berkeley */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgetsqrhrt_(integer *m, integer *n, integer *mb1, + integer *nb1, integer *nb2, real *a, integer *lda, real *t, integer * + ldt, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3; + + /* Local variables */ + integer ldwt, lworkopt, i__, j, iinfo; + extern /* Subroutine */ int sorgtsqr_row_(integer *, integer *, integer * + , integer *, real *, integer *, real *, integer *, real *, + integer *, integer *), scopy_(integer *, real *, integer *, real * + , integer *), sorhr_col_(integer *, integer *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, + integer *, ftnlen); + logical lquery; + integer lw1, lw2, num_all_row_blocks__, lwt; + extern /* Subroutine */ int slatsqr_(integer *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , integer *); + integer nb1local, nb2local; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *m < *n) { + *info = -2; + } else if (*mb1 <= *n) { + *info = -3; + } else if (*nb1 < 1) { + *info = -4; + } else if (*nb2 < 1) { + *info = -5; + } else if (*lda < f2cmax(1,*m)) { + *info = -7; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = f2cmin(*nb2,*n); + if (*ldt < f2cmax(i__1,i__2)) { + *info = -9; + } else { + +/* Test the input LWORK for the dimension of the array WORK. */ +/* This workspace is used to store array: */ +/* a) Matrix T and WORK for SLATSQR; */ +/* b) N-by-N upper-triangular factor R_tsqr; */ +/* c) Matrix T and array WORK for SORGTSQR_ROW; */ +/* d) Diagonal D for SORHR_COL. */ + + if (*lwork < *n * *n + 1 && ! lquery) { + *info = -11; + } else { + +/* Set block size for column blocks */ + + nb1local = f2cmin(*nb1,*n); + +/* Computing MAX */ + r__3 = (real) (*m - *n) / (real) (*mb1 - *n) + .5f; + r__1 = 1.f, r__2 = r_int(&r__3); + num_all_row_blocks__ = f2cmax(r__1,r__2); + +/* Length and leading dimension of WORK array to place */ +/* T array in TSQR. */ + + lwt = num_all_row_blocks__ * *n * nb1local; + ldwt = nb1local; + +/* Length of TSQR work array */ + + lw1 = nb1local * *n; + +/* Length of SORGTSQR_ROW work array. */ + +/* Computing MAX */ + i__1 = nb1local, i__2 = *n - nb1local; + lw2 = nb1local * f2cmax(i__1,i__2); + +/* Computing MAX */ +/* Computing MAX */ + i__3 = lwt + *n * *n + lw2, i__4 = lwt + *n * *n + *n; + i__1 = lwt + lw1, i__2 = f2cmax(i__3,i__4); + lworkopt = f2cmax(i__1,i__2); + + if (*lwork < f2cmax(1,lworkopt) && ! lquery) { + *info = -11; + } + + } + } + } + +/* Handle error in the input parameters and return workspace query. */ + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETSQRHRT", &i__1, (ftnlen)10); + return 0; + } else if (lquery) { + work[1] = (real) lworkopt; + return 0; + } + +/* Quick return if possible */ + + if (f2cmin(*m,*n) == 0) { + work[1] = (real) lworkopt; + return 0; + } + + nb2local = f2cmin(*nb2,*n); + + +/* (1) Perform TSQR-factorization of the M-by-N matrix A. */ + + slatsqr_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, &work[ + lwt + 1], &lw1, &iinfo); + +/* (2) Copy the factor R_tsqr stored in the upper-triangular part */ +/* of A into the square matrix in the work array */ +/* WORK(LWT+1:LWT+N*N) column-by-column. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + scopy_(&j, &a[j * a_dim1 + 1], &c__1, &work[lwt + *n * (j - 1) + 1], & + c__1); + } + +/* (3) Generate a M-by-N matrix Q with orthonormal columns from */ +/* the result stored below the diagonal in the array A in place. */ + + sorgtsqr_row_(m, n, mb1, &nb1local, &a[a_offset], lda, &work[1], &ldwt, & + work[lwt + *n * *n + 1], &lw2, &iinfo); + +/* (4) Perform the reconstruction of Householder vectors from */ +/* the matrix Q (stored in A) in place. */ + + sorhr_col_(m, n, &nb2local, &a[a_offset], lda, &t[t_offset], ldt, &work[ + lwt + *n * *n + 1], &iinfo); + +/* (5) Copy the factor R_tsqr stored in the square matrix in the */ +/* work array WORK(LWT+1:LWT+N*N) into the upper-triangular */ +/* part of A. */ + +/* (6) Compute from R_tsqr the factor R_hr corresponding to */ +/* the reconstructed Householder vectors, i.e. R_hr = S * R_tsqr. */ +/* This multiplication by the sign matrix S on the left means */ +/* changing the sign of I-th row of the matrix R_tsqr according */ +/* to sign of the I-th diagonal element DIAG(I) of the matrix S. */ +/* DIAG is stored in WORK( LWT+N*N+1 ) from the SORHR_COL output. */ + +/* (5) and (6) can be combined in a single loop, so the rows in A */ +/* are accessed only once. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (work[lwt + *n * *n + i__] == -1.f) { + i__2 = *n; + for (j = i__; j <= i__2; ++j) { + a[i__ + j * a_dim1] = work[lwt + *n * (j - 1) + i__] * -1.f; + } + } else { + i__2 = *n - i__ + 1; + scopy_(&i__2, &work[lwt + *n * (i__ - 1) + i__], n, &a[i__ + i__ * + a_dim1], lda); + } + } + + work[1] = (real) lworkopt; + return 0; + +/* End of SGETSQRHRT */ + +} /* sgetsqrhrt_ */ + diff --git a/lapack-netlib/SRC/sggbak.c b/lapack-netlib/SRC/sggbak.c new file mode 100644 index 000000000..380f7ca82 --- /dev/null +++ b/lapack-netlib/SRC/sggbak.c @@ -0,0 +1,720 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGGBAK */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGBAK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, */ +/* LDV, INFO ) */ + +/* CHARACTER JOB, SIDE */ +/* INTEGER IHI, ILO, INFO, LDV, M, N */ +/* REAL LSCALE( * ), RSCALE( * ), V( LDV, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGBAK forms the right or left eigenvectors of a real generalized */ +/* > eigenvalue problem A*x = lambda*B*x, by backward transformation on */ +/* > the computed eigenvectors of the balanced pair of matrices output by */ +/* > SGGBAL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies the type of backward transformation required: */ +/* > = 'N': do nothing, return immediately; */ +/* > = 'P': do backward transformation for permutation only; */ +/* > = 'S': do backward transformation for scaling only; */ +/* > = 'B': do backward transformations for both permutation and */ +/* > scaling. */ +/* > JOB must be the same as the argument JOB supplied to SGGBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'R': V contains right eigenvectors; */ +/* > = 'L': V contains left eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrix V. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > The integers ILO and IHI determined by SGGBAL. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LSCALE */ +/* > \verbatim */ +/* > LSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and/or scaling factors applied */ +/* > to the left side of A and B, as returned by SGGBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RSCALE */ +/* > \verbatim */ +/* > RSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and/or scaling factors applied */ +/* > to the right side of A and B, as returned by SGGBAL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix V. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is REAL array, dimension (LDV,M) */ +/* > On entry, the matrix of right or left eigenvectors to be */ +/* > transformed, as returned by STGEVC. */ +/* > On exit, V is overwritten by the transformed eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the matrix V. LDV >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGBcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > See R.C. Ward, Balancing the generalized eigenvalue problem, */ +/* > SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sggbak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *lscale, real *rscale, integer *m, real *v, + integer *ldv, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + integer i__, k; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + logical leftv; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), xerbla_(char *, integer *, ftnlen); + logical rightv; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + --lscale; + --rscale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*n == 0 && *ihi == 0 && *ilo != 1) { + *info = -4; + } else if (*n > 0 && (*ihi < *ilo || *ihi > f2cmax(1,*n))) { + *info = -5; + } else if (*n == 0 && *ilo == 1 && *ihi != 0) { + *info = -5; + } else if (*m < 0) { + *info = -8; + } else if (*ldv < f2cmax(1,*n)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGBAK", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + +/* Backward transformation on right eigenvectors */ + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + sscal_(m, &rscale[i__], &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + +/* Backward transformation on left eigenvectors */ + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + sscal_(m, &lscale[i__], &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + } + +/* Backward permutation */ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + +/* Backward permutation on right eigenvectors */ + + if (rightv) { + if (*ilo == 1) { + goto L50; + } + + for (i__ = *ilo - 1; i__ >= 1; --i__) { + k = rscale[i__]; + if (k == i__) { + goto L40; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + +L50: + if (*ihi == *n) { + goto L70; + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + k = rscale[i__]; + if (k == i__) { + goto L60; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L60: + ; + } + } + +/* Backward permutation on left eigenvectors */ + +L70: + if (leftv) { + if (*ilo == 1) { + goto L90; + } + for (i__ = *ilo - 1; i__ >= 1; --i__) { + k = lscale[i__]; + if (k == i__) { + goto L80; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L80: + ; + } + +L90: + if (*ihi == *n) { + goto L110; + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + k = lscale[i__]; + if (k == i__) { + goto L100; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L100: + ; + } + } + } + +L110: + + return 0; + +/* End of SGGBAK */ + +} /* sggbak_ */ + diff --git a/lapack-netlib/SRC/sggbal.c b/lapack-netlib/SRC/sggbal.c new file mode 100644 index 000000000..0ba59666b --- /dev/null +++ b/lapack-netlib/SRC/sggbal.c @@ -0,0 +1,1069 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGGBAL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGBAL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, */ +/* RSCALE, WORK, INFO ) */ + +/* CHARACTER JOB */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, N */ +/* REAL A( LDA, * ), B( LDB, * ), LSCALE( * ), */ +/* $ RSCALE( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGBAL balances a pair of general real matrices (A,B). This */ +/* > involves, first, permuting A and B by similarity transformations to */ +/* > isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N */ +/* > elements on the diagonal; and second, applying a diagonal similarity */ +/* > transformation to rows and columns ILO to IHI to make the rows */ +/* > and columns as close in norm as possible. Both steps are optional. */ +/* > */ +/* > Balancing may reduce the 1-norm of the matrices, and improve the */ +/* > accuracy of the computed eigenvalues and/or eigenvectors in the */ +/* > generalized eigenvalue problem A*x = lambda*B*x. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > Specifies the operations to be performed on A and B: */ +/* > = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 */ +/* > and RSCALE(I) = 1.0 for i = 1,...,N. */ +/* > = 'P': permute only; */ +/* > = 'S': scale only; */ +/* > = 'B': both permute and scale. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the input matrix A. */ +/* > On exit, A is overwritten by the balanced matrix. */ +/* > If JOB = 'N', A is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,N) */ +/* > On entry, the input matrix B. */ +/* > On exit, B is overwritten by the balanced matrix. */ +/* > If JOB = 'N', B is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are set to integers such that on exit */ +/* > A(i,j) = 0 and B(i,j) = 0 if i > j and */ +/* > j = 1,...,ILO-1 or i = IHI+1,...,N. */ +/* > If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] LSCALE */ +/* > \verbatim */ +/* > LSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the left side of A and B. If P(j) is the index of the */ +/* > row interchanged with row j, and D(j) */ +/* > is the scaling factor applied to row j, then */ +/* > LSCALE(j) = P(j) for J = 1,...,ILO-1 */ +/* > = D(j) for J = ILO,...,IHI */ +/* > = P(j) for J = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RSCALE */ +/* > \verbatim */ +/* > RSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the right side of A and B. If P(j) is the index of the */ +/* > column interchanged with column j, and D(j) */ +/* > is the scaling factor applied to column j, then */ +/* > LSCALE(j) = P(j) for J = 1,...,ILO-1 */ +/* > = D(j) for J = ILO,...,IHI */ +/* > = P(j) for J = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (lwork) */ +/* > lwork must be at least f2cmax(1,6*N) when JOB = 'S' or 'B', and */ +/* > at least 1 when JOB = 'N' or 'P'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGBcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > See R.C. WARD, Balancing the generalized eigenvalue problem, */ +/* > SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sggbal_(char *job, integer *n, real *a, integer *lda, + real *b, integer *ldb, integer *ilo, integer *ihi, real *lscale, real + *rscale, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + real r__1, r__2, r__3; + + /* Local variables */ + integer lcab; + real beta, coef; + integer irab, lrab; + real basl, cmax; + extern real sdot_(integer *, real *, integer *, real *, integer *); + real coef2, coef5; + integer i__, j, k, l, m; + real gamma, t, alpha; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + real sfmin, sfmax; + integer iflow; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *); + integer kount; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *); + integer jc; + real ta, tb, tc; + integer ir, it; + real ew; + integer nr; + real pgamma; + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer isamax_(integer *, real *, integer *); + integer lsfmin, lsfmax, ip1, jp1, lm1; + real cab, rab, ewc, cor, sum; + integer nrp2, icab; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --lscale; + --rscale; + --work; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*ldb < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGBAL", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *ilo = 1; + *ihi = *n; + return 0; + } + + if (*n == 1) { + *ilo = 1; + *ihi = *n; + lscale[1] = 1.f; + rscale[1] = 1.f; + return 0; + } + + if (lsame_(job, "N")) { + *ilo = 1; + *ihi = *n; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + lscale[i__] = 1.f; + rscale[i__] = 1.f; +/* L10: */ + } + return 0; + } + + k = 1; + l = *n; + if (lsame_(job, "S")) { + goto L190; + } + + goto L30; + +/* Permute the matrices A and B to isolate the eigenvalues. */ + +/* Find row with one nonzero in columns 1 through L */ + +L20: + l = lm1; + if (l != 1) { + goto L30; + } + + rscale[1] = 1.f; + lscale[1] = 1.f; + goto L190; + +L30: + lm1 = l - 1; + for (i__ = l; i__ >= 1; --i__) { + i__1 = lm1; + for (j = 1; j <= i__1; ++j) { + jp1 = j + 1; + if (a[i__ + j * a_dim1] != 0.f || b[i__ + j * b_dim1] != 0.f) { + goto L50; + } +/* L40: */ + } + j = l; + goto L70; + +L50: + i__1 = l; + for (j = jp1; j <= i__1; ++j) { + if (a[i__ + j * a_dim1] != 0.f || b[i__ + j * b_dim1] != 0.f) { + goto L80; + } +/* L60: */ + } + j = jp1 - 1; + +L70: + m = l; + iflow = 1; + goto L160; +L80: + ; + } + goto L100; + +/* Find column with one nonzero in rows K through N */ + +L90: + ++k; + +L100: + i__1 = l; + for (j = k; j <= i__1; ++j) { + i__2 = lm1; + for (i__ = k; i__ <= i__2; ++i__) { + ip1 = i__ + 1; + if (a[i__ + j * a_dim1] != 0.f || b[i__ + j * b_dim1] != 0.f) { + goto L120; + } +/* L110: */ + } + i__ = l; + goto L140; +L120: + i__2 = l; + for (i__ = ip1; i__ <= i__2; ++i__) { + if (a[i__ + j * a_dim1] != 0.f || b[i__ + j * b_dim1] != 0.f) { + goto L150; + } +/* L130: */ + } + i__ = ip1 - 1; +L140: + m = k; + iflow = 2; + goto L160; +L150: + ; + } + goto L190; + +/* Permute rows M and I */ + +L160: + lscale[m] = (real) i__; + if (i__ == m) { + goto L170; + } + i__1 = *n - k + 1; + sswap_(&i__1, &a[i__ + k * a_dim1], lda, &a[m + k * a_dim1], lda); + i__1 = *n - k + 1; + sswap_(&i__1, &b[i__ + k * b_dim1], ldb, &b[m + k * b_dim1], ldb); + +/* Permute columns M and J */ + +L170: + rscale[m] = (real) j; + if (j == m) { + goto L180; + } + sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + sswap_(&l, &b[j * b_dim1 + 1], &c__1, &b[m * b_dim1 + 1], &c__1); + +L180: + switch (iflow) { + case 1: goto L20; + case 2: goto L90; + } + +L190: + *ilo = k; + *ihi = l; + + if (lsame_(job, "P")) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + lscale[i__] = 1.f; + rscale[i__] = 1.f; +/* L195: */ + } + return 0; + } + + if (*ilo == *ihi) { + return 0; + } + +/* Balance the submatrix in rows ILO to IHI. */ + + nr = *ihi - *ilo + 1; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + rscale[i__] = 0.f; + lscale[i__] = 0.f; + + work[i__] = 0.f; + work[i__ + *n] = 0.f; + work[i__ + (*n << 1)] = 0.f; + work[i__ + *n * 3] = 0.f; + work[i__ + (*n << 2)] = 0.f; + work[i__ + *n * 5] = 0.f; +/* L200: */ + } + +/* Compute right side vector in resulting linear equations */ + + basl = r_lg10(&c_b35); + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *ihi; + for (j = *ilo; j <= i__2; ++j) { + tb = b[i__ + j * b_dim1]; + ta = a[i__ + j * a_dim1]; + if (ta == 0.f) { + goto L210; + } + r__1 = abs(ta); + ta = r_lg10(&r__1) / basl; +L210: + if (tb == 0.f) { + goto L220; + } + r__1 = abs(tb); + tb = r_lg10(&r__1) / basl; +L220: + work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb; + work[j + *n * 5] = work[j + *n * 5] - ta - tb; +/* L230: */ + } +/* L240: */ + } + + coef = 1.f / (real) (nr << 1); + coef2 = coef * coef; + coef5 = coef2 * .5f; + nrp2 = nr + 2; + beta = 0.f; + it = 1; + +/* Start generalized conjugate gradient iteration */ + +L250: + + gamma = sdot_(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)] + , &c__1) + sdot_(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + * + n * 5], &c__1); + + ew = 0.f; + ewc = 0.f; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + ew += work[i__ + (*n << 2)]; + ewc += work[i__ + *n * 5]; +/* L260: */ + } + +/* Computing 2nd power */ + r__1 = ew; +/* Computing 2nd power */ + r__2 = ewc; +/* Computing 2nd power */ + r__3 = ew - ewc; + gamma = coef * gamma - coef2 * (r__1 * r__1 + r__2 * r__2) - coef5 * ( + r__3 * r__3); + if (gamma == 0.f) { + goto L350; + } + if (it != 1) { + beta = gamma / pgamma; + } + t = coef5 * (ewc - ew * 3.f); + tc = coef5 * (ew - ewc * 3.f); + + sscal_(&nr, &beta, &work[*ilo], &c__1); + sscal_(&nr, &beta, &work[*ilo + *n], &c__1); + + saxpy_(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], & + c__1); + saxpy_(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1); + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + work[i__] += tc; + work[i__ + *n] += t; +/* L270: */ + } + +/* Apply matrix to vector */ + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + kount = 0; + sum = 0.f; + i__2 = *ihi; + for (j = *ilo; j <= i__2; ++j) { + if (a[i__ + j * a_dim1] == 0.f) { + goto L280; + } + ++kount; + sum += work[j]; +L280: + if (b[i__ + j * b_dim1] == 0.f) { + goto L290; + } + ++kount; + sum += work[j]; +L290: + ; + } + work[i__ + (*n << 1)] = (real) kount * work[i__ + *n] + sum; +/* L300: */ + } + + i__1 = *ihi; + for (j = *ilo; j <= i__1; ++j) { + kount = 0; + sum = 0.f; + i__2 = *ihi; + for (i__ = *ilo; i__ <= i__2; ++i__) { + if (a[i__ + j * a_dim1] == 0.f) { + goto L310; + } + ++kount; + sum += work[i__ + *n]; +L310: + if (b[i__ + j * b_dim1] == 0.f) { + goto L320; + } + ++kount; + sum += work[i__ + *n]; +L320: + ; + } + work[j + *n * 3] = (real) kount * work[j] + sum; +/* L330: */ + } + + sum = sdot_(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) + + sdot_(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1); + alpha = gamma / sum; + +/* Determine correction to current iteration */ + + cmax = 0.f; + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + cor = alpha * work[i__ + *n]; + if (abs(cor) > cmax) { + cmax = abs(cor); + } + lscale[i__] += cor; + cor = alpha * work[i__]; + if (abs(cor) > cmax) { + cmax = abs(cor); + } + rscale[i__] += cor; +/* L340: */ + } + if (cmax < .5f) { + goto L350; + } + + r__1 = -alpha; + saxpy_(&nr, &r__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)] + , &c__1); + r__1 = -alpha; + saxpy_(&nr, &r__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], & + c__1); + + pgamma = gamma; + ++it; + if (it <= nrp2) { + goto L250; + } + +/* End generalized conjugate gradient iteration */ + +L350: + sfmin = slamch_("S"); + sfmax = 1.f / sfmin; + lsfmin = (integer) (r_lg10(&sfmin) / basl + 1.f); + lsfmax = (integer) (r_lg10(&sfmax) / basl); + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *n - *ilo + 1; + irab = isamax_(&i__2, &a[i__ + *ilo * a_dim1], lda); + rab = (r__1 = a[i__ + (irab + *ilo - 1) * a_dim1], abs(r__1)); + i__2 = *n - *ilo + 1; + irab = isamax_(&i__2, &b[i__ + *ilo * b_dim1], ldb); +/* Computing MAX */ + r__2 = rab, r__3 = (r__1 = b[i__ + (irab + *ilo - 1) * b_dim1], abs( + r__1)); + rab = f2cmax(r__2,r__3); + r__1 = rab + sfmin; + lrab = (integer) (r_lg10(&r__1) / basl + 1.f); + ir = lscale[i__] + r_sign(&c_b71, &lscale[i__]); +/* Computing MIN */ + i__2 = f2cmax(ir,lsfmin), i__2 = f2cmin(i__2,lsfmax), i__3 = lsfmax - lrab; + ir = f2cmin(i__2,i__3); + lscale[i__] = pow_ri(&c_b35, &ir); + icab = isamax_(ihi, &a[i__ * a_dim1 + 1], &c__1); + cab = (r__1 = a[icab + i__ * a_dim1], abs(r__1)); + icab = isamax_(ihi, &b[i__ * b_dim1 + 1], &c__1); +/* Computing MAX */ + r__2 = cab, r__3 = (r__1 = b[icab + i__ * b_dim1], abs(r__1)); + cab = f2cmax(r__2,r__3); + r__1 = cab + sfmin; + lcab = (integer) (r_lg10(&r__1) / basl + 1.f); + jc = rscale[i__] + r_sign(&c_b71, &rscale[i__]); +/* Computing MIN */ + i__2 = f2cmax(jc,lsfmin), i__2 = f2cmin(i__2,lsfmax), i__3 = lsfmax - lcab; + jc = f2cmin(i__2,i__3); + rscale[i__] = pow_ri(&c_b35, &jc); +/* L360: */ + } + +/* Row scaling of matrices A and B */ + + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *n - *ilo + 1; + sscal_(&i__2, &lscale[i__], &a[i__ + *ilo * a_dim1], lda); + i__2 = *n - *ilo + 1; + sscal_(&i__2, &lscale[i__], &b[i__ + *ilo * b_dim1], ldb); +/* L370: */ + } + +/* Column scaling of matrices A and B */ + + i__1 = *ihi; + for (j = *ilo; j <= i__1; ++j) { + sscal_(ihi, &rscale[j], &a[j * a_dim1 + 1], &c__1); + sscal_(ihi, &rscale[j], &b[j * b_dim1 + 1], &c__1); +/* L380: */ + } + + return 0; + +/* End of SGGBAL */ + +} /* sggbal_ */ + diff --git a/lapack-netlib/SRC/sgges.c b/lapack-netlib/SRC/sgges.c new file mode 100644 index 000000000..1cec32bea --- /dev/null +++ b/lapack-netlib/SRC/sgges.c @@ -0,0 +1,1162 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGGES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors f +or GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGES + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, */ +/* SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, */ +/* LDVSR, WORK, LWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVSL, JOBVSR, SORT */ +/* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM */ +/* LOGICAL BWORK( * ) */ +/* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */ +/* $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), */ +/* $ VSR( LDVSR, * ), WORK( * ) */ +/* LOGICAL SELCTG */ +/* EXTERNAL SELCTG */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B), */ +/* > the generalized eigenvalues, the generalized real Schur form (S,T), */ +/* > optionally, the left and/or right matrices of Schur vectors (VSL and */ +/* > VSR). This gives the generalized Schur factorization */ +/* > */ +/* > (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) */ +/* > */ +/* > Optionally, it also orders the eigenvalues so that a selected cluster */ +/* > of eigenvalues appears in the leading diagonal blocks of the upper */ +/* > quasi-triangular matrix S and the upper triangular matrix T.The */ +/* > leading columns of VSL and VSR then form an orthonormal basis for the */ +/* > corresponding left and right eigenspaces (deflating subspaces). */ +/* > */ +/* > (If only the generalized eigenvalues are needed, use the driver */ +/* > SGGEV instead, which is faster.) */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* > or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* > usually represented as the pair (alpha,beta), as there is a */ +/* > reasonable interpretation for beta=0 or both being zero. */ +/* > */ +/* > A pair of matrices (S,T) is in generalized real Schur form if T is */ +/* > upper triangular with non-negative diagonal and S is block upper */ +/* > triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond */ +/* > to real generalized eigenvalues, while 2-by-2 blocks of S will be */ +/* > "standardized" by making the corresponding elements of T have the */ +/* > form: */ +/* > [ a 0 ] */ +/* > [ 0 b ] */ +/* > */ +/* > and the pair of corresponding 2-by-2 blocks in S and T will have a */ +/* > complex conjugate pair of generalized eigenvalues. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVSL */ +/* > \verbatim */ +/* > JOBVSL is CHARACTER*1 */ +/* > = 'N': do not compute the left Schur vectors; */ +/* > = 'V': compute the left Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVSR */ +/* > \verbatim */ +/* > JOBVSR is CHARACTER*1 */ +/* > = 'N': do not compute the right Schur vectors; */ +/* > = 'V': compute the right Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the generalized Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELCTG); */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELCTG */ +/* > \verbatim */ +/* > SELCTG is a LOGICAL FUNCTION of three REAL arguments */ +/* > SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'N', SELCTG is not referenced. */ +/* > If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */ +/* > SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */ +/* > one of a complex conjugate pair of eigenvalues is selected, */ +/* > then both complex eigenvalues are selected. */ +/* > */ +/* > Note that in the ill-conditioned case, a selected complex */ +/* > eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), */ +/* > BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 */ +/* > in this case. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VSL, and VSR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the first of the pair of matrices. */ +/* > On exit, A has been overwritten by its generalized Schur */ +/* > form S. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB, N) */ +/* > On entry, the second of the pair of matrices. */ +/* > On exit, B has been overwritten by its generalized Schur */ +/* > form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELCTG is true. (Complex conjugate pairs for which */ +/* > SELCTG is true for either eigenvalue count as 2.) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAR */ +/* > \verbatim */ +/* > ALPHAR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAI */ +/* > \verbatim */ +/* > ALPHAI is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is REAL array, dimension (N) */ +/* > On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* > be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, */ +/* > and BETA(j),j=1,...,N are the diagonals of the complex Schur */ +/* > form (S,T) that would result if the 2-by-2 diagonal blocks of */ +/* > the real Schur form of (A,B) were further reduced to */ +/* > triangular form using 2-by-2 complex unitary transformations. */ +/* > If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ +/* > positive, then the j-th and (j+1)-st eigenvalues are a */ +/* > complex conjugate pair, with ALPHAI(j+1) negative. */ +/* > */ +/* > Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* > may easily over- or underflow, and BETA(j) may even be zero. */ +/* > Thus, the user should avoid naively computing the ratio. */ +/* > However, ALPHAR and ALPHAI will be always less than and */ +/* > usually comparable with norm(A) in magnitude, and BETA always */ +/* > less than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSL */ +/* > \verbatim */ +/* > VSL is REAL array, dimension (LDVSL,N) */ +/* > If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* > Not referenced if JOBVSL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSL */ +/* > \verbatim */ +/* > LDVSL is INTEGER */ +/* > The leading dimension of the matrix VSL. LDVSL >=1, and */ +/* > if JOBVSL = 'V', LDVSL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSR */ +/* > \verbatim */ +/* > VSR is REAL array, dimension (LDVSR,N) */ +/* > If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* > Not referenced if JOBVSR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSR */ +/* > \verbatim */ +/* > LDVSR is INTEGER */ +/* > The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* > if JOBVSR = 'V', LDVSR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N = 0, LWORK >= 1, else LWORK >= f2cmax(8*N,6*N+16). */ +/* > For good performance , LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. (A,B) are not in Schur */ +/* > form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */ +/* > be correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in SHGEQZ. */ +/* > =N+2: after reordering, roundoff changed values of */ +/* > some complex eigenvalues so that leading */ +/* > eigenvalues in the Generalized Schur form no */ +/* > longer satisfy SELCTG=.TRUE. This could also */ +/* > be caused due to scaling. */ +/* > =N+3: reordering failed in STGSEN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int sgges_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, real *a, integer *lda, real *b, integer *ldb, + integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, + integer *ldvsl, real *vsr, integer *ldvsr, real *work, integer *lwork, + logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + real r__1; + + /* Local variables */ + real anrm, bnrm; + integer idum[1], ierr, itau, iwrk; + real pvsl, pvsr; + integer i__; + extern logical lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irows; + logical lst2sl; + extern /* Subroutine */ int slabad_(real *, real *); + integer ip; + extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, integer * + ), sggbal_(char *, integer *, real *, integer *, + real *, integer *, integer *, integer *, real *, real *, real *, + integer *); + logical ilascl, ilbscl; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + real safmin; + extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , real *, integer *, integer *); + real safmax; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ijobvl, iright; + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slaset_(char *, integer *, + integer *, real *, real *, real *, integer *); + real anrmto, bnrmto; + logical lastsl; + extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real * + , real *, real *, real *, integer *, real *, integer *, real *, + integer *, integer *), stgsen_(integer *, + logical *, logical *, logical *, integer *, real *, integer *, + real *, integer *, real *, real *, real *, real *, integer *, + real *, integer *, integer *, real *, real *, real *, real *, + integer *, integer *, integer *, integer *); + integer minwrk, maxwrk; + real smlnum; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + logical wantst, lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + real dif[2]; + integer ihi, ilo; + real eps; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1 * 1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1 * 1; + vsr -= vsr_offset; + --work; + --bwork; + + /* Function Body */ + if (lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = lsame_(sort, "S"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -3; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -15; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -17; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + if (*n > 0) { +/* Computing MAX */ + i__1 = *n << 3, i__2 = *n * 6 + 16; + minwrk = f2cmax(i__1,i__2); + maxwrk = minwrk - *n + *n * ilaenv_(&c__1, "SGEQRF", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = minwrk - *n + *n * ilaenv_(&c__1, "SORMQR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + if (ilvsl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = minwrk - *n + *n * ilaenv_(&c__1, "SOR" + "GQR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + } + } else { + minwrk = 1; + maxwrk = 1; + } + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -19; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGES ", &i__1, (ftnlen)5); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + safmin = slamch_("S"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + smlnum = sqrt(safmin) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Workspace: need 6*N + 2*N space for storing balancing factors) */ + + ileft = 1; + iright = *n + 1; + iwrk = iright + *n; + sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = iwrk; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvsl) { + slaset_("Full", n, n, &c_b38, &c_b39, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + sorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + slaset_("Full", n, n, &c_b38, &c_b39, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + sgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); + +/* Perform QZ algorithm, computing Schur vectors if desired */ +/* (Workspace: need N) */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + shgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] + , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L40; + } + +/* Sort eigenvalues ALPHA/BETA if desired */ +/* (Workspace: need 4*N+16 ) */ + + *sdim = 0; + if (wantst) { + +/* Undo scaling on eigenvalues before SELCTGing */ + + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], + n, &ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], + n, &ierr); + } + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); +/* L10: */ + } + + i__1 = *lwork - iwrk + 1; + stgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ + vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, & + pvsr, dif, &work[iwrk], &i__1, idum, &c__1, &ierr); + if (ierr == 1) { + *info = *n + 3; + } + + } + +/* Apply back-permutation to VSL and VSR */ +/* (Workspace: none needed) */ + + if (ilvsl) { + sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ + vsl_offset], ldvsl, &ierr); + } + + if (ilvsr) { + sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ + vsr_offset], ldvsr, &ierr); + } + +/* Check if unscaling would cause over/underflow, if so, rescale */ +/* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */ +/* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */ + + if (ilascl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.f) { + if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[ + i__] > anrm / anrmto) { + work[1] = (r__1 = a[i__ + i__ * a_dim1] / alphar[i__], + abs(r__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } else if (alphai[i__] / safmax > anrmto / anrm || safmin / + alphai[i__] > anrm / anrmto) { + work[1] = (r__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[ + i__], abs(r__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L50: */ + } + } + + if (ilbscl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.f) { + if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] + > bnrm / bnrmto) { + work[1] = (r__1 = b[i__ + i__ * b_dim1] / beta[i__], abs( + r__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L60: */ + } + } + +/* Undo scaling */ + + if (ilascl) { + slascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + slascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); + if (alphai[i__] == 0.f) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L30: */ + } + + } + +L40: + + work[1] = (real) maxwrk; + + return 0; + +/* End of SGGES */ + +} /* sgges_ */ + diff --git a/lapack-netlib/SRC/sgges3.c b/lapack-netlib/SRC/sgges3.c new file mode 100644 index 000000000..518de890a --- /dev/null +++ b/lapack-netlib/SRC/sgges3.c @@ -0,0 +1,1163 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGGES3 computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors +for GE matrices (blocked algorithm) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGES3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, */ +/* $ LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, */ +/* $ VSR, LDVSR, WORK, LWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVSL, JOBVSR, SORT */ +/* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM */ +/* LOGICAL BWORK( * ) */ +/* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */ +/* $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), */ +/* $ VSR( LDVSR, * ), WORK( * ) */ +/* LOGICAL SELCTG */ +/* EXTERNAL SELCTG */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGES3 computes for a pair of N-by-N real nonsymmetric matrices (A,B), */ +/* > the generalized eigenvalues, the generalized real Schur form (S,T), */ +/* > optionally, the left and/or right matrices of Schur vectors (VSL and */ +/* > VSR). This gives the generalized Schur factorization */ +/* > */ +/* > (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) */ +/* > */ +/* > Optionally, it also orders the eigenvalues so that a selected cluster */ +/* > of eigenvalues appears in the leading diagonal blocks of the upper */ +/* > quasi-triangular matrix S and the upper triangular matrix T.The */ +/* > leading columns of VSL and VSR then form an orthonormal basis for the */ +/* > corresponding left and right eigenspaces (deflating subspaces). */ +/* > */ +/* > (If only the generalized eigenvalues are needed, use the driver */ +/* > SGGEV instead, which is faster.) */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* > or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* > usually represented as the pair (alpha,beta), as there is a */ +/* > reasonable interpretation for beta=0 or both being zero. */ +/* > */ +/* > A pair of matrices (S,T) is in generalized real Schur form if T is */ +/* > upper triangular with non-negative diagonal and S is block upper */ +/* > triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond */ +/* > to real generalized eigenvalues, while 2-by-2 blocks of S will be */ +/* > "standardized" by making the corresponding elements of T have the */ +/* > form: */ +/* > [ a 0 ] */ +/* > [ 0 b ] */ +/* > */ +/* > and the pair of corresponding 2-by-2 blocks in S and T will have a */ +/* > complex conjugate pair of generalized eigenvalues. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVSL */ +/* > \verbatim */ +/* > JOBVSL is CHARACTER*1 */ +/* > = 'N': do not compute the left Schur vectors; */ +/* > = 'V': compute the left Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVSR */ +/* > \verbatim */ +/* > JOBVSR is CHARACTER*1 */ +/* > = 'N': do not compute the right Schur vectors; */ +/* > = 'V': compute the right Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the generalized Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELCTG); */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELCTG */ +/* > \verbatim */ +/* > SELCTG is a LOGICAL FUNCTION of three REAL arguments */ +/* > SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'N', SELCTG is not referenced. */ +/* > If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */ +/* > SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */ +/* > one of a complex conjugate pair of eigenvalues is selected, */ +/* > then both complex eigenvalues are selected. */ +/* > */ +/* > Note that in the ill-conditioned case, a selected complex */ +/* > eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), */ +/* > BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 */ +/* > in this case. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VSL, and VSR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the first of the pair of matrices. */ +/* > On exit, A has been overwritten by its generalized Schur */ +/* > form S. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB, N) */ +/* > On entry, the second of the pair of matrices. */ +/* > On exit, B has been overwritten by its generalized Schur */ +/* > form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELCTG is true. (Complex conjugate pairs for which */ +/* > SELCTG is true for either eigenvalue count as 2.) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAR */ +/* > \verbatim */ +/* > ALPHAR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAI */ +/* > \verbatim */ +/* > ALPHAI is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is REAL array, dimension (N) */ +/* > On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* > be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, */ +/* > and BETA(j),j=1,...,N are the diagonals of the complex Schur */ +/* > form (S,T) that would result if the 2-by-2 diagonal blocks of */ +/* > the real Schur form of (A,B) were further reduced to */ +/* > triangular form using 2-by-2 complex unitary transformations. */ +/* > If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ +/* > positive, then the j-th and (j+1)-st eigenvalues are a */ +/* > complex conjugate pair, with ALPHAI(j+1) negative. */ +/* > */ +/* > Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* > may easily over- or underflow, and BETA(j) may even be zero. */ +/* > Thus, the user should avoid naively computing the ratio. */ +/* > However, ALPHAR and ALPHAI will be always less than and */ +/* > usually comparable with norm(A) in magnitude, and BETA always */ +/* > less than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSL */ +/* > \verbatim */ +/* > VSL is REAL array, dimension (LDVSL,N) */ +/* > If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* > Not referenced if JOBVSL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSL */ +/* > \verbatim */ +/* > LDVSL is INTEGER */ +/* > The leading dimension of the matrix VSL. LDVSL >=1, and */ +/* > if JOBVSL = 'V', LDVSL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSR */ +/* > \verbatim */ +/* > VSR is REAL array, dimension (LDVSR,N) */ +/* > If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* > Not referenced if JOBVSR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSR */ +/* > \verbatim */ +/* > LDVSR is INTEGER */ +/* > The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* > if JOBVSR = 'V', LDVSR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. (A,B) are not in Schur */ +/* > form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */ +/* > be correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in SHGEQZ. */ +/* > =N+2: after reordering, roundoff changed values of */ +/* > some complex eigenvalues so that leading */ +/* > eigenvalues in the Generalized Schur form no */ +/* > longer satisfy SELCTG=.TRUE. This could also */ +/* > be caused due to scaling. */ +/* > =N+3: reordering failed in STGSEN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date January 2015 */ + +/* > \ingroup realGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int sgges3_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, integer *n, real *a, integer *lda, real *b, integer *ldb, + integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, + integer *ldvsl, real *vsr, integer *ldvsr, real *work, integer *lwork, + logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + real r__1; + + /* Local variables */ + real anrm, bnrm; + integer idum[1], ierr, itau, iwrk; + real pvsl, pvsr; + integer i__; + extern logical lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irows; + extern /* Subroutine */ int sgghd3_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , real *, integer *, real *, integer *, integer *) + ; + logical lst2sl; + extern /* Subroutine */ int slabad_(real *, real *); + integer ip; + extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, integer * + ), sggbal_(char *, integer *, real *, integer *, + real *, integer *, integer *, integer *, real *, real *, real *, + integer *); + logical ilascl, ilbscl; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real safmax, bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + integer ijobvl, iright; + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slaset_(char *, integer *, + integer *, real *, real *, real *, integer *); + real anrmto, bnrmto; + logical lastsl; + extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real * + , real *, real *, real *, integer *, real *, integer *, real *, + integer *, integer *), stgsen_(integer *, + logical *, logical *, logical *, integer *, real *, integer *, + real *, integer *, real *, real *, real *, real *, integer *, + real *, integer *, integer *, real *, real *, real *, real *, + integer *, integer *, integer *, integer *); + real smlnum; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + logical wantst, lquery; + integer lwkopt; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + real dif[2]; + integer ihi, ilo; + real eps; + + +/* -- LAPACK driver routine (version 3.6.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2015 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1 * 1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1 * 1; + vsr -= vsr_offset; + --work; + --bwork; + + /* Function Body */ + if (lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = lsame_(sort, "S"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -3; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -15; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -17; + } else if (*lwork < *n * 6 + 16 && ! lquery) { + *info = -19; + } + +/* Compute workspace */ + + if (*info == 0) { + sgeqrf_(n, n, &b[b_offset], ldb, &work[1], &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = *n * 6 + 16, i__2 = *n * 3 + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + sormqr_("L", "T", n, n, n, &b[b_offset], ldb, &work[1], &a[a_offset], + lda, &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + if (ilvsl) { + sorgqr_(n, n, n, &vsl[vsl_offset], ldvsl, &work[1], &work[1], & + c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + } + sgghd3_(jobvsl, jobvsr, n, &c__1, n, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[ + 1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + shgeqz_("S", jobvsl, jobvsr, n, &c__1, n, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ + vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[1], &c_n1, + &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (*n << 1) + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + if (wantst) { + stgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, & + b[b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ + vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, + &pvsr, dif, &work[1], &c_n1, idum, &c__1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (*n << 1) + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + } + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGES3 ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + safmin = slamch_("S"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + smlnum = sqrt(safmin) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ + + ileft = 1; + iright = *n + 1; + iwrk = iright + *n; + sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = iwrk; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ + + i__1 = *lwork + 1 - iwrk; + sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ + + if (ilvsl) { + slaset_("Full", n, n, &c_b36, &c_b37, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + sorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + slaset_("Full", n, n, &c_b36, &c_b37, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ + + i__1 = *lwork + 1 - iwrk; + sgghd3_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk] + , &i__1, &ierr); + +/* Perform QZ algorithm, computing Schur vectors if desired */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + shgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] + , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L40; + } + +/* Sort eigenvalues ALPHA/BETA if desired */ + + *sdim = 0; + if (wantst) { + +/* Undo scaling on eigenvalues before SELCTGing */ + + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], + n, &ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], + n, &ierr); + } + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); +/* L10: */ + } + + i__1 = *lwork - iwrk + 1; + stgsen_(&c__0, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ + vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pvsl, & + pvsr, dif, &work[iwrk], &i__1, idum, &c__1, &ierr); + if (ierr == 1) { + *info = *n + 3; + } + + } + +/* Apply back-permutation to VSL and VSR */ + + if (ilvsl) { + sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ + vsl_offset], ldvsl, &ierr); + } + + if (ilvsr) { + sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ + vsr_offset], ldvsr, &ierr); + } + +/* Check if unscaling would cause over/underflow, if so, rescale */ +/* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */ +/* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */ + + if (ilascl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.f) { + if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[ + i__] > anrm / anrmto) { + work[1] = (r__1 = a[i__ + i__ * a_dim1] / alphar[i__], + abs(r__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } else if (alphai[i__] / safmax > anrmto / anrm || safmin / + alphai[i__] > anrm / anrmto) { + work[1] = (r__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[ + i__], abs(r__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L50: */ + } + } + + if (ilbscl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.f) { + if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] + > bnrm / bnrmto) { + work[1] = (r__1 = b[i__ + i__ * b_dim1] / beta[i__], abs( + r__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L60: */ + } + } + +/* Undo scaling */ + + if (ilascl) { + slascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + slascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); + if (alphai[i__] == 0.f) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L30: */ + } + + } + +L40: + + work[1] = (real) lwkopt; + + return 0; + +/* End of SGGES3 */ + +} /* sgges3_ */ + diff --git a/lapack-netlib/SRC/sggesx.c b/lapack-netlib/SRC/sggesx.c new file mode 100644 index 000000000..3703b9e44 --- /dev/null +++ b/lapack-netlib/SRC/sggesx.c @@ -0,0 +1,1308 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGGESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors +for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGESX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, */ +/* B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, */ +/* VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, */ +/* LIWORK, BWORK, INFO ) */ + +/* CHARACTER JOBVSL, JOBVSR, SENSE, SORT */ +/* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, */ +/* $ SDIM */ +/* LOGICAL BWORK( * ) */ +/* INTEGER IWORK( * ) */ +/* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */ +/* $ B( LDB, * ), BETA( * ), RCONDE( 2 ), */ +/* $ RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ), */ +/* $ WORK( * ) */ +/* LOGICAL SELCTG */ +/* EXTERNAL SELCTG */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGESX computes for a pair of N-by-N real nonsymmetric matrices */ +/* > (A,B), the generalized eigenvalues, the real Schur form (S,T), and, */ +/* > optionally, the left and/or right matrices of Schur vectors (VSL and */ +/* > VSR). This gives the generalized Schur factorization */ +/* > */ +/* > (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) */ +/* > */ +/* > Optionally, it also orders the eigenvalues so that a selected cluster */ +/* > of eigenvalues appears in the leading diagonal blocks of the upper */ +/* > quasi-triangular matrix S and the upper triangular matrix T; computes */ +/* > a reciprocal condition number for the average of the selected */ +/* > eigenvalues (RCONDE); and computes a reciprocal condition number for */ +/* > the right and left deflating subspaces corresponding to the selected */ +/* > eigenvalues (RCONDV). The leading columns of VSL and VSR then form */ +/* > an orthonormal basis for the corresponding left and right eigenspaces */ +/* > (deflating subspaces). */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar w */ +/* > or a ratio alpha/beta = w, such that A - w*B is singular. It is */ +/* > usually represented as the pair (alpha,beta), as there is a */ +/* > reasonable interpretation for beta=0 or for both being zero. */ +/* > */ +/* > A pair of matrices (S,T) is in generalized real Schur form if T is */ +/* > upper triangular with non-negative diagonal and S is block upper */ +/* > triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond */ +/* > to real generalized eigenvalues, while 2-by-2 blocks of S will be */ +/* > "standardized" by making the corresponding elements of T have the */ +/* > form: */ +/* > [ a 0 ] */ +/* > [ 0 b ] */ +/* > */ +/* > and the pair of corresponding 2-by-2 blocks in S and T will have a */ +/* > complex conjugate pair of generalized eigenvalues. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVSL */ +/* > \verbatim */ +/* > JOBVSL is CHARACTER*1 */ +/* > = 'N': do not compute the left Schur vectors; */ +/* > = 'V': compute the left Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVSR */ +/* > \verbatim */ +/* > JOBVSR is CHARACTER*1 */ +/* > = 'N': do not compute the right Schur vectors; */ +/* > = 'V': compute the right Schur vectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SORT */ +/* > \verbatim */ +/* > SORT is CHARACTER*1 */ +/* > Specifies whether or not to order the eigenvalues on the */ +/* > diagonal of the generalized Schur form. */ +/* > = 'N': Eigenvalues are not ordered; */ +/* > = 'S': Eigenvalues are ordered (see SELCTG). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SELCTG */ +/* > \verbatim */ +/* > SELCTG is a LOGICAL FUNCTION of three REAL arguments */ +/* > SELCTG must be declared EXTERNAL in the calling subroutine. */ +/* > If SORT = 'N', SELCTG is not referenced. */ +/* > If SORT = 'S', SELCTG is used to select eigenvalues to sort */ +/* > to the top left of the Schur form. */ +/* > An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if */ +/* > SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either */ +/* > one of a complex conjugate pair of eigenvalues is selected, */ +/* > then both complex eigenvalues are selected. */ +/* > Note that a selected complex eigenvalue may no longer satisfy */ +/* > SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering, */ +/* > since ordering may change the value of complex eigenvalues */ +/* > (especially if the eigenvalue is ill-conditioned), in this */ +/* > case INFO is set to N+3. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': None are computed; */ +/* > = 'E': Computed for average of selected eigenvalues only; */ +/* > = 'V': Computed for selected deflating subspaces only; */ +/* > = 'B': Computed for both. */ +/* > If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VSL, and VSR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the first of the pair of matrices. */ +/* > On exit, A has been overwritten by its generalized Schur */ +/* > form S. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB, N) */ +/* > On entry, the second of the pair of matrices. */ +/* > On exit, B has been overwritten by its generalized Schur */ +/* > form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SDIM */ +/* > \verbatim */ +/* > SDIM is INTEGER */ +/* > If SORT = 'N', SDIM = 0. */ +/* > If SORT = 'S', SDIM = number of eigenvalues (after sorting) */ +/* > for which SELCTG is true. (Complex conjugate pairs for which */ +/* > SELCTG is true for either eigenvalue count as 2.) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAR */ +/* > \verbatim */ +/* > ALPHAR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAI */ +/* > \verbatim */ +/* > ALPHAI is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is REAL array, dimension (N) */ +/* > On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* > be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i */ +/* > and BETA(j),j=1,...,N are the diagonals of the complex Schur */ +/* > form (S,T) that would result if the 2-by-2 diagonal blocks of */ +/* > the real Schur form of (A,B) were further reduced to */ +/* > triangular form using 2-by-2 complex unitary transformations. */ +/* > If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ +/* > positive, then the j-th and (j+1)-st eigenvalues are a */ +/* > complex conjugate pair, with ALPHAI(j+1) negative. */ +/* > */ +/* > Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* > may easily over- or underflow, and BETA(j) may even be zero. */ +/* > Thus, the user should avoid naively computing the ratio. */ +/* > However, ALPHAR and ALPHAI will be always less than and */ +/* > usually comparable with norm(A) in magnitude, and BETA always */ +/* > less than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSL */ +/* > \verbatim */ +/* > VSL is REAL array, dimension (LDVSL,N) */ +/* > If JOBVSL = 'V', VSL will contain the left Schur vectors. */ +/* > Not referenced if JOBVSL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSL */ +/* > \verbatim */ +/* > LDVSL is INTEGER */ +/* > The leading dimension of the matrix VSL. LDVSL >=1, and */ +/* > if JOBVSL = 'V', LDVSL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VSR */ +/* > \verbatim */ +/* > VSR is REAL array, dimension (LDVSR,N) */ +/* > If JOBVSR = 'V', VSR will contain the right Schur vectors. */ +/* > Not referenced if JOBVSR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVSR */ +/* > \verbatim */ +/* > LDVSR is INTEGER */ +/* > The leading dimension of the matrix VSR. LDVSR >= 1, and */ +/* > if JOBVSR = 'V', LDVSR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is REAL array, dimension ( 2 ) */ +/* > If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the */ +/* > reciprocal condition numbers for the average of the selected */ +/* > eigenvalues. */ +/* > Not referenced if SENSE = 'N' or 'V'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is REAL array, dimension ( 2 ) */ +/* > If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the */ +/* > reciprocal condition numbers for the selected deflating */ +/* > subspaces. */ +/* > Not referenced if SENSE = 'N' or 'E'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', */ +/* > LWORK >= f2cmax( 8*N, 6*N+16, 2*SDIM*(N-SDIM) ), else */ +/* > LWORK >= f2cmax( 8*N, 6*N+16 ). */ +/* > Note that 2*SDIM*(N-SDIM) <= N*N/2. */ +/* > Note also that an error is only returned if */ +/* > LWORK < f2cmax( 8*N, 6*N+16), but if SENSE = 'E' or 'V' or 'B' */ +/* > this may not be large enough. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the bound on the optimal size of the WORK */ +/* > array and the minimum size of the IWORK array, returns these */ +/* > values as the first entries of the WORK and IWORK arrays, and */ +/* > no error message related to LWORK or LIWORK is issued by */ +/* > XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise */ +/* > LIWORK >= N+6. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the bound on the optimal size of the */ +/* > WORK array and the minimum size of the IWORK array, returns */ +/* > these values as the first entries of the WORK and IWORK */ +/* > arrays, and no error message related to LWORK or LIWORK is */ +/* > issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > Not referenced if SORT = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. (A,B) are not in Schur */ +/* > form, but ALPHAR(j), ALPHAI(j), and BETA(j) should */ +/* > be correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in SHGEQZ */ +/* > =N+2: after reordering, roundoff changed values of */ +/* > some complex eigenvalues so that leading */ +/* > eigenvalues in the Generalized Schur form no */ +/* > longer satisfy SELCTG=.TRUE. This could also */ +/* > be caused due to scaling. */ +/* > =N+3: reordering failed in STGSEN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup realGEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > An approximate (asymptotic) bound on the average absolute error of */ +/* > the selected eigenvalues is */ +/* > */ +/* > EPS * norm((A, B)) / RCONDE( 1 ). */ +/* > */ +/* > An approximate (asymptotic) bound on the maximum angular error in */ +/* > the computed deflating subspaces is */ +/* > */ +/* > EPS * norm((A, B)) / RCONDV( 2 ). */ +/* > */ +/* > See LAPACK User's Guide, section 4.11 for more information. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp + selctg, char *sense, integer *n, real *a, integer *lda, real *b, + integer *ldb, integer *sdim, real *alphar, real *alphai, real *beta, + real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *rconde, + real *rcondv, real *work, integer *lwork, integer *iwork, integer * + liwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, + vsr_dim1, vsr_offset, i__1, i__2; + real r__1; + + /* Local variables */ + integer ijob; + real anrm, bnrm; + integer ierr, itau, iwrk, lwrk, i__; + extern logical lsame_(char *, char *); + integer ileft, icols; + logical cursl, ilvsl, ilvsr; + integer irows; + logical lst2sl; + extern /* Subroutine */ int slabad_(real *, real *); + integer ip; + real pl; + extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, integer * + ), sggbal_(char *, integer *, real *, integer *, + real *, integer *, integer *, integer *, real *, real *, real *, + integer *); + real pr; + logical ilascl, ilbscl; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + real safmin; + extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , real *, integer *, integer *); + real safmax; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ijobvl, iright; + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *); + logical wantsb, wantse, lastsl; + integer liwmin; + real anrmto, bnrmto; + integer minwrk, maxwrk; + logical wantsn; + real smlnum; + extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real * + , real *, real *, real *, integer *, real *, integer *, real *, + integer *, integer *), slaset_(char *, + integer *, integer *, real *, real *, real *, integer *), + sorgqr_(integer *, integer *, integer *, real *, integer *, real * + , real *, integer *, integer *), stgsen_(integer *, logical *, + logical *, logical *, integer *, real *, integer *, real *, + integer *, real *, real *, real *, real *, integer *, real *, + integer *, integer *, real *, real *, real *, real *, integer *, + integer *, integer *, integer *); + logical wantst, lquery, wantsv; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + real dif[2]; + integer ihi, ilo; + real eps; + + +/* -- LAPACK driver routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vsl_dim1 = *ldvsl; + vsl_offset = 1 + vsl_dim1 * 1; + vsl -= vsl_offset; + vsr_dim1 = *ldvsr; + vsr_offset = 1 + vsr_dim1 * 1; + vsr -= vsr_offset; + --rconde; + --rcondv; + --work; + --iwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvsl, "N")) { + ijobvl = 1; + ilvsl = FALSE_; + } else if (lsame_(jobvsl, "V")) { + ijobvl = 2; + ilvsl = TRUE_; + } else { + ijobvl = -1; + ilvsl = FALSE_; + } + + if (lsame_(jobvsr, "N")) { + ijobvr = 1; + ilvsr = FALSE_; + } else if (lsame_(jobvsr, "V")) { + ijobvr = 2; + ilvsr = TRUE_; + } else { + ijobvr = -1; + ilvsr = FALSE_; + } + + wantst = lsame_(sort, "S"); + wantsn = lsame_(sense, "N"); + wantse = lsame_(sense, "E"); + wantsv = lsame_(sense, "V"); + wantsb = lsame_(sense, "B"); + lquery = *lwork == -1 || *liwork == -1; + if (wantsn) { + ijob = 0; + } else if (wantse) { + ijob = 1; + } else if (wantsv) { + ijob = 2; + } else if (wantsb) { + ijob = 4; + } + +/* Test the input arguments */ + + *info = 0; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (! wantst && ! lsame_(sort, "N")) { + *info = -3; + } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! + wantsn) { + *info = -5; + } else if (*n < 0) { + *info = -6; + } else if (*lda < f2cmax(1,*n)) { + *info = -8; + } else if (*ldb < f2cmax(1,*n)) { + *info = -10; + } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { + *info = -16; + } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { + *info = -18; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + if (*n > 0) { +/* Computing MAX */ + i__1 = *n << 3, i__2 = *n * 6 + 16; + minwrk = f2cmax(i__1,i__2); + maxwrk = minwrk - *n + *n * ilaenv_(&c__1, "SGEQRF", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = minwrk - *n + *n * ilaenv_(&c__1, "SORMQR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + if (ilvsl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = minwrk - *n + *n * ilaenv_(&c__1, "SOR" + "GQR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + } + lwrk = maxwrk; + if (ijob >= 1) { +/* Computing MAX */ + i__1 = lwrk, i__2 = *n * *n / 2; + lwrk = f2cmax(i__1,i__2); + } + } else { + minwrk = 1; + maxwrk = 1; + lwrk = 1; + } + work[1] = (real) lwrk; + if (wantsn || *n == 0) { + liwmin = 1; + } else { + liwmin = *n + 6; + } + iwork[1] = liwmin; + + if (*lwork < minwrk && ! lquery) { + *info = -22; + } else if (*liwork < liwmin && ! lquery) { + *info = -24; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGESX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + *sdim = 0; + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + safmin = slamch_("S"); + safmax = 1.f / safmin; + slabad_(&safmin, &safmax); + smlnum = sqrt(safmin) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrix to make it more nearly triangular */ +/* (Workspace: need 6*N + 2*N for permutation parameters) */ + + ileft = 1; + iright = *n + 1; + iwrk = iright + *n; + sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + icols = *n + 1 - ilo; + itau = iwrk; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VSL */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvsl) { + slaset_("Full", n, n, &c_b42, &c_b43, &vsl[vsl_offset], ldvsl); + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vsl[ + ilo + 1 + ilo * vsl_dim1], ldvsl); + } + i__1 = *lwork + 1 - iwrk; + sorgqr_(&irows, &irows, &irows, &vsl[ilo + ilo * vsl_dim1], ldvsl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VSR */ + + if (ilvsr) { + slaset_("Full", n, n, &c_b42, &c_b43, &vsr[vsr_offset], ldvsr); + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + sgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); + + *sdim = 0; + +/* Perform QZ algorithm, computing Schur vectors if desired */ +/* (Workspace: need N) */ + + iwrk = itau; + i__1 = *lwork + 1 - iwrk; + shgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] + , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L50; + } + +/* Sort eigenvalues ALPHA/BETA and compute the reciprocal of */ +/* condition number(s) */ +/* (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) ) */ +/* otherwise, need 8*(N+1) ) */ + + if (wantst) { + +/* Undo scaling on eigenvalues before SELCTGing */ + + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], + n, &ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], + n, &ierr); + } + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, + &ierr); + } + +/* Select eigenvalues */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); +/* L10: */ + } + +/* Reorder eigenvalues, transform Generalized Schur vectors, and */ +/* compute reciprocal condition numbers */ + + i__1 = *lwork - iwrk + 1; + stgsen_(&ijob, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ + vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pl, &pr, + dif, &work[iwrk], &i__1, &iwork[1], liwork, &ierr); + + if (ijob >= 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*sdim << 1) * (*n - *sdim); + maxwrk = f2cmax(i__1,i__2); + } + if (ierr == -22) { + +/* not enough real workspace */ + + *info = -22; + } else { + if (ijob == 1 || ijob == 4) { + rconde[1] = pl; + rconde[2] = pr; + } + if (ijob == 2 || ijob == 4) { + rcondv[1] = dif[0]; + rcondv[2] = dif[1]; + } + if (ierr == 1) { + *info = *n + 3; + } + } + + } + +/* Apply permutation to VSL and VSR */ +/* (Workspace: none needed) */ + + if (ilvsl) { + sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ + vsl_offset], ldvsl, &ierr); + } + + if (ilvsr) { + sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ + vsr_offset], ldvsr, &ierr); + } + +/* Check if unscaling would cause over/underflow, if so, rescale */ +/* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of */ +/* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */ + + if (ilascl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.f) { + if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[ + i__] > anrm / anrmto) { + work[1] = (r__1 = a[i__ + i__ * a_dim1] / alphar[i__], + abs(r__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } else if (alphai[i__] / safmax > anrmto / anrm || safmin / + alphai[i__] > anrm / anrmto) { + work[1] = (r__1 = a[i__ + (i__ + 1) * a_dim1] / alphai[ + i__], abs(r__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L20: */ + } + } + + if (ilbscl) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (alphai[i__] != 0.f) { + if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] + > bnrm / bnrmto) { + work[1] = (r__1 = b[i__ + i__ * b_dim1] / beta[i__], abs( + r__1)); + beta[i__] *= work[1]; + alphar[i__] *= work[1]; + alphai[i__] *= work[1]; + } + } +/* L25: */ + } + } + +/* Undo scaling */ + + if (ilascl) { + slascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & + ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + slascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & + ierr); + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + if (wantst) { + +/* Check if reordering is correct */ + + lastsl = TRUE_; + lst2sl = TRUE_; + *sdim = 0; + ip = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); + if (alphai[i__] == 0.f) { + if (cursl) { + ++(*sdim); + } + ip = 0; + if (cursl && ! lastsl) { + *info = *n + 2; + } + } else { + if (ip == 1) { + +/* Last eigenvalue of conjugate pair */ + + cursl = cursl || lastsl; + lastsl = cursl; + if (cursl) { + *sdim += 2; + } + ip = -1; + if (cursl && ! lst2sl) { + *info = *n + 2; + } + } else { + +/* First eigenvalue of conjugate pair */ + + ip = 1; + } + } + lst2sl = lastsl; + lastsl = cursl; +/* L40: */ + } + + } + +L50: + + work[1] = (real) maxwrk; + iwork[1] = liwmin; + + return 0; + +/* End of SGGESX */ + +} /* sggesx_ */ + diff --git a/lapack-netlib/SRC/sggev.c b/lapack-netlib/SRC/sggev.c new file mode 100644 index 000000000..39357ba9e --- /dev/null +++ b/lapack-netlib/SRC/sggev.c @@ -0,0 +1,1099 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGGEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matr +ices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, */ +/* BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) */ + +/* CHARACTER JOBVL, JOBVR */ +/* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N */ +/* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */ +/* $ B( LDB, * ), BETA( * ), VL( LDVL, * ), */ +/* $ VR( LDVR, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B) */ +/* > the generalized eigenvalues, and optionally, the left and/or right */ +/* > generalized eigenvectors. */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* > singular. It is usually represented as the pair (alpha,beta), as */ +/* > there is a reasonable interpretation for beta=0, and even for both */ +/* > being zero. */ +/* > */ +/* > The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > */ +/* > A * v(j) = lambda(j) * B * v(j). */ +/* > */ +/* > The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > */ +/* > u(j)**H * A = lambda(j) * u(j)**H * B . */ +/* > */ +/* > where u(j)**H is the conjugate-transpose of u(j). */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': do not compute the left generalized eigenvectors; */ +/* > = 'V': compute the left generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': do not compute the right generalized eigenvectors; */ +/* > = 'V': compute the right generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VL, and VR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the matrix A in the pair (A,B). */ +/* > On exit, A has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB, N) */ +/* > On entry, the matrix B in the pair (A,B). */ +/* > On exit, B has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAR */ +/* > \verbatim */ +/* > ALPHAR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAI */ +/* > \verbatim */ +/* > ALPHAI is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is REAL array, dimension (N) */ +/* > On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* > be the generalized eigenvalues. If ALPHAI(j) is zero, then */ +/* > the j-th eigenvalue is real; if positive, then the j-th and */ +/* > (j+1)-st eigenvalues are a complex conjugate pair, with */ +/* > ALPHAI(j+1) negative. */ +/* > */ +/* > Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* > may easily over- or underflow, and BETA(j) may even be zero. */ +/* > Thus, the user should avoid naively computing the ratio */ +/* > alpha/beta. However, ALPHAR and ALPHAI will be always less */ +/* > than and usually comparable with norm(A) in magnitude, and */ +/* > BETA always less than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is REAL array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* > after another in the columns of VL, in the same order as */ +/* > their eigenvalues. If the j-th eigenvalue is real, then */ +/* > u(j) = VL(:,j), the j-th column of VL. If the j-th and */ +/* > (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* > u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part)+abs(imag. part)=1. */ +/* > Not referenced if JOBVL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the matrix VL. LDVL >= 1, and */ +/* > if JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is REAL array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* > after another in the columns of VR, in the same order as */ +/* > their eigenvalues. If the j-th eigenvalue is real, then */ +/* > v(j) = VR(:,j), the j-th column of VR. If the j-th and */ +/* > (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* > v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part)+abs(imag. part)=1. */ +/* > Not referenced if JOBVR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the matrix VR. LDVR >= 1, and */ +/* > if JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,8*N). */ +/* > For good performance, LWORK must generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. No eigenvectors have been */ +/* > calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */ +/* > should be correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in SHGEQZ. */ +/* > =N+2: error return from STGEVC. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup realGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int sggev_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real + *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + real anrm, bnrm; + integer ierr, itau; + real temp; + logical ilvl, ilvr; + integer iwrk; + extern logical lsame_(char *, char *); + integer ileft, icols, irows, jc; + extern /* Subroutine */ int slabad_(real *, real *); + integer in, jr; + extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, integer * + ), sggbal_(char *, integer *, real *, integer *, + real *, integer *, integer *, integer *, real *, real *, real *, + integer *); + logical ilascl, ilbscl; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgghrd_( + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, integer *, real *, integer *, real *, integer *, + integer *); + logical ldumma[1]; + char chtemp[1]; + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ijobvl, iright; + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slaset_(char *, integer *, + integer *, real *, real *, real *, integer *), stgevc_( + char *, char *, logical *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *, integer *, + integer *, real *, integer *); + real anrmto, bnrmto; + extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real * + , real *, real *, real *, integer *, real *, integer *, real *, + integer *, integer *); + integer minwrk, maxwrk; + real smlnum; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + integer ihi, ilo; + real eps; + logical ilv; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --work; + + /* Function Body */ + if (lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -12; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -14; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. The workspace is */ +/* computed assuming ILO = 1 and IHI = N, the worst case.) */ + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 3; + minwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = 1, i__2 = *n * (ilaenv_(&c__1, "SGEQRF", " ", n, &c__1, n, & + c__0, (ftnlen)6, (ftnlen)1) + 7); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * (ilaenv_(&c__1, "SORMQR", " ", n, &c__1, n, + &c__0, (ftnlen)6, (ftnlen)1) + 7); + maxwrk = f2cmax(i__1,i__2); + if (ilvl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * (ilaenv_(&c__1, "SORGQR", " ", n, & + c__1, n, &c_n1, (ftnlen)6, (ftnlen)1) + 7); + maxwrk = f2cmax(i__1,i__2); + } + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -16; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrices A, B to isolate eigenvalues if possible */ +/* (Workspace: need 6*N) */ + + ileft = 1; + iright = *n + 1; + iwrk = iright + *n; + sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Workspace: need N, prefer N*NB) */ + + irows = ihi + 1 - ilo; + if (ilv) { + icols = *n + 1 - ilo; + } else { + icols = irows; + } + itau = iwrk; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ +/* (Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvl) { + slaset_("Full", n, n, &c_b36, &c_b37, &vl[vl_offset], ldvl) + ; + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ + ilo + 1 + ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + sorgqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ + itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VR */ + + if (ilvr) { + slaset_("Full", n, n, &c_b36, &c_b37, &vr[vr_offset], ldvr) + ; + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + if (ilv) { + +/* Eigenvectors requested -- work on whole matrix. */ + + sgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); + } else { + sgghrd_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, + &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur forms and Schur vectors) */ +/* (Workspace: need N) */ + + iwrk = itau; + if (ilv) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + i__1 = *lwork + 1 - iwrk; + shgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, &work[iwrk], &i__1, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L110; + } + +/* Compute Eigenvectors */ +/* (Workspace: need 6*N) */ + + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + stgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, + &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ + iwrk], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L110; + } + +/* Undo balancing on VL and VR and normalization */ +/* (Workspace: none needed) */ + + if (ilvl) { + sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, & + vl[vl_offset], ldvl, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.f) { + goto L50; + } + temp = 0.f; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__2 = temp, r__3 = (r__1 = vl[jr + jc * vl_dim1], + abs(r__1)); + temp = f2cmax(r__2,r__3); +/* L10: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__3 = temp, r__4 = (r__1 = vl[jr + jc * vl_dim1], + abs(r__1)) + (r__2 = vl[jr + (jc + 1) * + vl_dim1], abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L20: */ + } + } + if (temp < smlnum) { + goto L50; + } + temp = 1.f / temp; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; +/* L30: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; + vl[jr + (jc + 1) * vl_dim1] *= temp; +/* L40: */ + } + } +L50: + ; + } + } + if (ilvr) { + sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, & + vr[vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.f) { + goto L100; + } + temp = 0.f; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__2 = temp, r__3 = (r__1 = vr[jr + jc * vr_dim1], + abs(r__1)); + temp = f2cmax(r__2,r__3); +/* L60: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__3 = temp, r__4 = (r__1 = vr[jr + jc * vr_dim1], + abs(r__1)) + (r__2 = vr[jr + (jc + 1) * + vr_dim1], abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L70: */ + } + } + if (temp < smlnum) { + goto L100; + } + temp = 1.f / temp; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; +/* L80: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; + vr[jr + (jc + 1) * vr_dim1] *= temp; +/* L90: */ + } + } +L100: + ; + } + } + +/* End of eigenvector calculation */ + + } + +/* Undo scaling if necessary */ + +L110: + + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + work[1] = (real) maxwrk; + return 0; + +/* End of SGGEV */ + +} /* sggev_ */ + diff --git a/lapack-netlib/SRC/sggev3.c b/lapack-netlib/SRC/sggev3.c new file mode 100644 index 000000000..956ba4391 --- /dev/null +++ b/lapack-netlib/SRC/sggev3.c @@ -0,0 +1,1111 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGGEV3 computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat +rices (blocked algorithm) */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGEV3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, */ +/* $ ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, LWORK, */ +/* $ INFO ) */ + +/* CHARACTER JOBVL, JOBVR */ +/* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N */ +/* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */ +/* $ B( LDB, * ), BETA( * ), VL( LDVL, * ), */ +/* $ VR( LDVR, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGEV3 computes for a pair of N-by-N real nonsymmetric matrices (A,B) */ +/* > the generalized eigenvalues, and optionally, the left and/or right */ +/* > generalized eigenvectors. */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* > singular. It is usually represented as the pair (alpha,beta), as */ +/* > there is a reasonable interpretation for beta=0, and even for both */ +/* > being zero. */ +/* > */ +/* > The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > */ +/* > A * v(j) = lambda(j) * B * v(j). */ +/* > */ +/* > The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > */ +/* > u(j)**H * A = lambda(j) * u(j)**H * B . */ +/* > */ +/* > where u(j)**H is the conjugate-transpose of u(j). */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': do not compute the left generalized eigenvectors; */ +/* > = 'V': compute the left generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': do not compute the right generalized eigenvectors; */ +/* > = 'V': compute the right generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VL, and VR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the matrix A in the pair (A,B). */ +/* > On exit, A has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB, N) */ +/* > On entry, the matrix B in the pair (A,B). */ +/* > On exit, B has been overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAR */ +/* > \verbatim */ +/* > ALPHAR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAI */ +/* > \verbatim */ +/* > ALPHAI is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is REAL array, dimension (N) */ +/* > On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* > be the generalized eigenvalues. If ALPHAI(j) is zero, then */ +/* > the j-th eigenvalue is real; if positive, then the j-th and */ +/* > (j+1)-st eigenvalues are a complex conjugate pair, with */ +/* > ALPHAI(j+1) negative. */ +/* > */ +/* > Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* > may easily over- or underflow, and BETA(j) may even be zero. */ +/* > Thus, the user should avoid naively computing the ratio */ +/* > alpha/beta. However, ALPHAR and ALPHAI will be always less */ +/* > than and usually comparable with norm(A) in magnitude, and */ +/* > BETA always less than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is REAL array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* > after another in the columns of VL, in the same order as */ +/* > their eigenvalues. If the j-th eigenvalue is real, then */ +/* > u(j) = VL(:,j), the j-th column of VL. If the j-th and */ +/* > (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* > u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part)+abs(imag. part)=1. */ +/* > Not referenced if JOBVL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the matrix VL. LDVL >= 1, and */ +/* > if JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is REAL array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* > after another in the columns of VR, in the same order as */ +/* > their eigenvalues. If the j-th eigenvalue is real, then */ +/* > v(j) = VR(:,j), the j-th column of VR. If the j-th and */ +/* > (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* > v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */ +/* > Each eigenvector is scaled so the largest component has */ +/* > abs(real part)+abs(imag. part)=1. */ +/* > Not referenced if JOBVR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the matrix VR. LDVR >= 1, and */ +/* > if JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. No eigenvectors have been */ +/* > calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */ +/* > should be correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in SHGEQZ. */ +/* > =N+2: error return from STGEVC. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date January 2015 */ + +/* > \ingroup realGEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int sggev3_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real + *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + real anrm, bnrm; + integer ierr, itau; + real temp; + logical ilvl, ilvr; + integer iwrk; + extern logical lsame_(char *, char *); + integer ileft, icols, irows; + extern /* Subroutine */ int sgghd3_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , real *, integer *, real *, integer *, integer *) + ; + integer jc; + extern /* Subroutine */ int slabad_(real *, real *); + integer in, jr; + extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, integer * + ), sggbal_(char *, integer *, real *, integer *, + real *, integer *, integer *, integer *, real *, real *, real *, + integer *); + logical ilascl, ilbscl; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical ldumma[1]; + char chtemp[1]; + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + integer ijobvl, iright; + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slaset_(char *, integer *, + integer *, real *, real *, real *, integer *), stgevc_( + char *, char *, logical *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *, integer *, + integer *, real *, integer *); + real anrmto, bnrmto; + extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real * + , real *, real *, real *, integer *, real *, integer *, real *, + integer *, integer *); + real smlnum; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + integer ihi, ilo; + real eps; + logical ilv; + + +/* -- LAPACK driver routine (version 3.6.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2015 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --work; + + /* Function Body */ + if (lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (ijobvl <= 0) { + *info = -1; + } else if (ijobvr <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -12; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -14; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 3; + if (*lwork < f2cmax(i__1,i__2) && ! lquery) { + *info = -16; + } + } + +/* Compute workspace */ + + if (*info == 0) { + sgeqrf_(n, n, &b[b_offset], ldb, &work[1], &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = 1, i__2 = *n << 3, i__1 = f2cmax(i__1,i__2), i__2 = *n * 3 + ( + integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + sormqr_("L", "T", n, n, n, &b[b_offset], ldb, &work[1], &a[a_offset], + lda, &work[1], &c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + sgghd3_(jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[1], & + c_n1, &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + if (ilvl) { + sorgqr_(n, n, n, &vl[vl_offset], ldvl, &work[1], &work[1], &c_n1, + &ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = *n * 3 + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + shgeqz_("S", jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, &work[1], &c_n1, & + ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (*n << 1) + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + } else { + shgeqz_("E", jobvl, jobvr, n, &c__1, n, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[ + vl_offset], ldvl, &vr[vr_offset], ldvr, &work[1], &c_n1, & + ierr); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (*n << 1) + (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + } + work[1] = (real) lwkopt; + + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGEV3 ", &i__1, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute the matrices A, B to isolate eigenvalues if possible */ + + ileft = 1; + iright = *n + 1; + iwrk = iright + *n; + sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ + ileft], &work[iright], &work[iwrk], &ierr); + +/* Reduce B to triangular form (QR decomposition of B) */ + + irows = ihi + 1 - ilo; + if (ilv) { + icols = *n + 1 - ilo; + } else { + icols = irows; + } + itau = iwrk; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + sgeqrf_(&irows, &icols, &b[ilo + ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to matrix A */ + + i__1 = *lwork + 1 - iwrk; + sormqr_("L", "T", &irows, &icols, &irows, &b[ilo + ilo * b_dim1], ldb, & + work[itau], &a[ilo + ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL */ + + if (ilvl) { + slaset_("Full", n, n, &c_b34, &c_b35, &vl[vl_offset], ldvl) + ; + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + slacpy_("L", &i__1, &i__2, &b[ilo + 1 + ilo * b_dim1], ldb, &vl[ + ilo + 1 + ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + sorgqr_(&irows, &irows, &irows, &vl[ilo + ilo * vl_dim1], ldvl, &work[ + itau], &work[iwrk], &i__1, &ierr); + } + +/* Initialize VR */ + + if (ilvr) { + slaset_("Full", n, n, &c_b34, &c_b35, &vr[vr_offset], ldvr) + ; + } + +/* Reduce to generalized Hessenberg form */ + + if (ilv) { + +/* Eigenvectors requested -- work on whole matrix. */ + + i__1 = *lwork + 1 - iwrk; + sgghd3_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[iwrk], + &i__1, &ierr); + } else { + i__1 = *lwork + 1 - iwrk; + sgghd3_("N", "N", &irows, &c__1, &irows, &a[ilo + ilo * a_dim1], lda, + &b[ilo + ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur forms and Schur vectors) */ + + iwrk = itau; + if (ilv) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + i__1 = *lwork + 1 - iwrk; + shgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ + b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], + ldvl, &vr[vr_offset], ldvr, &work[iwrk], &i__1, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L110; + } + +/* Compute Eigenvectors */ + + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + stgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, + &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ + iwrk], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L110; + } + +/* Undo balancing on VL and VR and normalization */ + + if (ilvl) { + sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, & + vl[vl_offset], ldvl, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.f) { + goto L50; + } + temp = 0.f; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__2 = temp, r__3 = (r__1 = vl[jr + jc * vl_dim1], + abs(r__1)); + temp = f2cmax(r__2,r__3); +/* L10: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__3 = temp, r__4 = (r__1 = vl[jr + jc * vl_dim1], + abs(r__1)) + (r__2 = vl[jr + (jc + 1) * + vl_dim1], abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L20: */ + } + } + if (temp < smlnum) { + goto L50; + } + temp = 1.f / temp; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; +/* L30: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; + vl[jr + (jc + 1) * vl_dim1] *= temp; +/* L40: */ + } + } +L50: + ; + } + } + if (ilvr) { + sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, & + vr[vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.f) { + goto L100; + } + temp = 0.f; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__2 = temp, r__3 = (r__1 = vr[jr + jc * vr_dim1], + abs(r__1)); + temp = f2cmax(r__2,r__3); +/* L60: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__3 = temp, r__4 = (r__1 = vr[jr + jc * vr_dim1], + abs(r__1)) + (r__2 = vr[jr + (jc + 1) * + vr_dim1], abs(r__2)); + temp = f2cmax(r__3,r__4); +/* L70: */ + } + } + if (temp < smlnum) { + goto L100; + } + temp = 1.f / temp; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; +/* L80: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; + vr[jr + (jc + 1) * vr_dim1] *= temp; +/* L90: */ + } + } +L100: + ; + } + } + +/* End of eigenvector calculation */ + + } + +/* Undo scaling if necessary */ + +L110: + + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + work[1] = (real) lwkopt; + return 0; + +/* End of SGGEV3 */ + +} /* sggev3_ */ + diff --git a/lapack-netlib/SRC/sggevx.c b/lapack-netlib/SRC/sggevx.c new file mode 100644 index 000000000..644b09bf6 --- /dev/null +++ b/lapack-netlib/SRC/sggevx.c @@ -0,0 +1,1385 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGGEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGEVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, */ +/* ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO, */ +/* IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, */ +/* RCONDV, WORK, LWORK, IWORK, BWORK, INFO ) */ + +/* CHARACTER BALANC, JOBVL, JOBVR, SENSE */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N */ +/* REAL ABNRM, BBNRM */ +/* LOGICAL BWORK( * ) */ +/* INTEGER IWORK( * ) */ +/* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), */ +/* $ B( LDB, * ), BETA( * ), LSCALE( * ), */ +/* $ RCONDE( * ), RCONDV( * ), RSCALE( * ), */ +/* $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B) */ +/* > the generalized eigenvalues, and optionally, the left and/or right */ +/* > generalized eigenvectors. */ +/* > */ +/* > Optionally also, it computes a balancing transformation to improve */ +/* > the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ +/* > LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for */ +/* > the eigenvalues (RCONDE), and reciprocal condition numbers for the */ +/* > right eigenvectors (RCONDV). */ +/* > */ +/* > A generalized eigenvalue for a pair of matrices (A,B) is a scalar */ +/* > lambda or a ratio alpha/beta = lambda, such that A - lambda*B is */ +/* > singular. It is usually represented as the pair (alpha,beta), as */ +/* > there is a reasonable interpretation for beta=0, and even for both */ +/* > being zero. */ +/* > */ +/* > The right eigenvector v(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > */ +/* > A * v(j) = lambda(j) * B * v(j) . */ +/* > */ +/* > The left eigenvector u(j) corresponding to the eigenvalue lambda(j) */ +/* > of (A,B) satisfies */ +/* > */ +/* > u(j)**H * A = lambda(j) * u(j)**H * B. */ +/* > */ +/* > where u(j)**H is the conjugate-transpose of u(j). */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] BALANC */ +/* > \verbatim */ +/* > BALANC is CHARACTER*1 */ +/* > Specifies the balance option to be performed. */ +/* > = 'N': do not diagonally scale or permute; */ +/* > = 'P': permute only; */ +/* > = 'S': scale only; */ +/* > = 'B': both permute and scale. */ +/* > Computed reciprocal condition numbers will be for the */ +/* > matrices after permuting and/or balancing. Permuting does */ +/* > not change condition numbers (in exact arithmetic), but */ +/* > balancing does. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVL */ +/* > \verbatim */ +/* > JOBVL is CHARACTER*1 */ +/* > = 'N': do not compute the left generalized eigenvectors; */ +/* > = 'V': compute the left generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVR */ +/* > \verbatim */ +/* > JOBVR is CHARACTER*1 */ +/* > = 'N': do not compute the right generalized eigenvectors; */ +/* > = 'V': compute the right generalized eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SENSE */ +/* > \verbatim */ +/* > SENSE is CHARACTER*1 */ +/* > Determines which reciprocal condition numbers are computed. */ +/* > = 'N': none are computed; */ +/* > = 'E': computed for eigenvalues only; */ +/* > = 'V': computed for eigenvectors only; */ +/* > = 'B': computed for eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A, B, VL, and VR. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the matrix A in the pair (A,B). */ +/* > On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' */ +/* > or both, then A contains the first part of the real Schur */ +/* > form of the "balanced" versions of the input A and B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB, N) */ +/* > On entry, the matrix B in the pair (A,B). */ +/* > On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' */ +/* > or both, then B contains the second part of the real Schur */ +/* > form of the "balanced" versions of the input A and B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAR */ +/* > \verbatim */ +/* > ALPHAR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAI */ +/* > \verbatim */ +/* > ALPHAI is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is REAL array, dimension (N) */ +/* > On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will */ +/* > be the generalized eigenvalues. If ALPHAI(j) is zero, then */ +/* > the j-th eigenvalue is real; if positive, then the j-th and */ +/* > (j+1)-st eigenvalues are a complex conjugate pair, with */ +/* > ALPHAI(j+1) negative. */ +/* > */ +/* > Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) */ +/* > may easily over- or underflow, and BETA(j) may even be zero. */ +/* > Thus, the user should avoid naively computing the ratio */ +/* > ALPHA/BETA. However, ALPHAR and ALPHAI will be always less */ +/* > than and usually comparable with norm(A) in magnitude, and */ +/* > BETA always less than and usually comparable with norm(B). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VL */ +/* > \verbatim */ +/* > VL is REAL array, dimension (LDVL,N) */ +/* > If JOBVL = 'V', the left eigenvectors u(j) are stored one */ +/* > after another in the columns of VL, in the same order as */ +/* > their eigenvalues. If the j-th eigenvalue is real, then */ +/* > u(j) = VL(:,j), the j-th column of VL. If the j-th and */ +/* > (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* > u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). */ +/* > Each eigenvector will be scaled so the largest component have */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVL = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the matrix VL. LDVL >= 1, and */ +/* > if JOBVL = 'V', LDVL >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VR */ +/* > \verbatim */ +/* > VR is REAL array, dimension (LDVR,N) */ +/* > If JOBVR = 'V', the right eigenvectors v(j) are stored one */ +/* > after another in the columns of VR, in the same order as */ +/* > their eigenvalues. If the j-th eigenvalue is real, then */ +/* > v(j) = VR(:,j), the j-th column of VR. If the j-th and */ +/* > (j+1)-th eigenvalues form a complex conjugate pair, then */ +/* > v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). */ +/* > Each eigenvector will be scaled so the largest component have */ +/* > abs(real part) + abs(imag. part) = 1. */ +/* > Not referenced if JOBVR = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the matrix VR. LDVR >= 1, and */ +/* > if JOBVR = 'V', LDVR >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI are integer values such that on exit */ +/* > A(i,j) = 0 and B(i,j) = 0 if i > j and */ +/* > j = 1,...,ILO-1 or i = IHI+1,...,N. */ +/* > If BALANC = 'N' or 'S', ILO = 1 and IHI = N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] LSCALE */ +/* > \verbatim */ +/* > LSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the left side of A and B. If PL(j) is the index of the */ +/* > row interchanged with row j, and DL(j) is the scaling */ +/* > factor applied to row j, then */ +/* > LSCALE(j) = PL(j) for j = 1,...,ILO-1 */ +/* > = DL(j) for j = ILO,...,IHI */ +/* > = PL(j) for j = IHI+1,...,N. */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RSCALE */ +/* > \verbatim */ +/* > RSCALE is REAL array, dimension (N) */ +/* > Details of the permutations and scaling factors applied */ +/* > to the right side of A and B. If PR(j) is the index of the */ +/* > column interchanged with column j, and DR(j) is the scaling */ +/* > factor applied to column j, then */ +/* > RSCALE(j) = PR(j) for j = 1,...,ILO-1 */ +/* > = DR(j) for j = ILO,...,IHI */ +/* > = PR(j) for j = IHI+1,...,N */ +/* > The order in which the interchanges are made is N to IHI+1, */ +/* > then 1 to ILO-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ABNRM */ +/* > \verbatim */ +/* > ABNRM is REAL */ +/* > The one-norm of the balanced matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BBNRM */ +/* > \verbatim */ +/* > BBNRM is REAL */ +/* > The one-norm of the balanced matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDE */ +/* > \verbatim */ +/* > RCONDE is REAL array, dimension (N) */ +/* > If SENSE = 'E' or 'B', the reciprocal condition numbers of */ +/* > the eigenvalues, stored in consecutive elements of the array. */ +/* > For a complex conjugate pair of eigenvalues two consecutive */ +/* > elements of RCONDE are set to the same value. Thus RCONDE(j), */ +/* > RCONDV(j), and the j-th columns of VL and VR all correspond */ +/* > to the j-th eigenpair. */ +/* > If SENSE = 'N' or 'V', RCONDE is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCONDV */ +/* > \verbatim */ +/* > RCONDV is REAL array, dimension (N) */ +/* > If SENSE = 'V' or 'B', the estimated reciprocal condition */ +/* > numbers of the eigenvectors, stored in consecutive elements */ +/* > of the array. For a complex eigenvector two consecutive */ +/* > elements of RCONDV are set to the same value. If the */ +/* > eigenvalues cannot be reordered to compute RCONDV(j), */ +/* > RCONDV(j) is set to 0; this can only occur when the true */ +/* > value would be very small anyway. */ +/* > If SENSE = 'N' or 'E', RCONDV is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,2*N). */ +/* > If BALANC = 'S' or 'B', or JOBVL = 'V', or JOBVR = 'V', */ +/* > LWORK >= f2cmax(1,6*N). */ +/* > If SENSE = 'E', LWORK >= f2cmax(1,10*N). */ +/* > If SENSE = 'V' or 'B', LWORK >= 2*N*N+8*N+16. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N+6) */ +/* > If SENSE = 'E', IWORK is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BWORK */ +/* > \verbatim */ +/* > BWORK is LOGICAL array, dimension (N) */ +/* > If SENSE = 'N', BWORK is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1,...,N: */ +/* > The QZ iteration failed. No eigenvectors have been */ +/* > calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) */ +/* > should be correct for j=INFO+1,...,N. */ +/* > > N: =N+1: other than QZ iteration failed in SHGEQZ. */ +/* > =N+2: error return from STGEVC. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date April 2012 */ + +/* > \ingroup realGEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Balancing a matrix pair (A,B) includes, first, permuting rows and */ +/* > columns to isolate eigenvalues, second, applying diagonal similarity */ +/* > transformation to the rows and columns to make the rows and columns */ +/* > as close in norm as possible. The computed reciprocal condition */ +/* > numbers correspond to the balanced matrix. Permuting rows and columns */ +/* > will not change the condition numbers (in exact arithmetic) but */ +/* > diagonal scaling will. For further explanation of balancing, see */ +/* > section 4.11.1.2 of LAPACK Users' Guide. */ +/* > */ +/* > An approximate error bound on the chordal distance between the i-th */ +/* > computed generalized eigenvalue w and the corresponding exact */ +/* > eigenvalue lambda is */ +/* > */ +/* > chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) */ +/* > */ +/* > An approximate error bound for the angle between the i-th computed */ +/* > eigenvector VL(i) or VR(i) is given by */ +/* > */ +/* > EPS * norm(ABNRM, BBNRM) / DIF(i). */ +/* > */ +/* > For further explanation of the reciprocal condition numbers RCONDE */ +/* > and RCONDV, see section 4.11 of LAPACK User's Guide. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sggevx_(char *balanc, char *jobvl, char *jobvr, char * + sense, integer *n, real *a, integer *lda, real *b, integer *ldb, real + *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, + integer *ldvr, integer *ilo, integer *ihi, real *lscale, real *rscale, + real *abnrm, real *bbnrm, real *rconde, real *rcondv, real *work, + integer *lwork, integer *iwork, logical *bwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, + vr_offset, i__1, i__2; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + logical pair; + real anrm, bnrm; + integer ierr, itau; + real temp; + logical ilvl, ilvr; + integer iwrk, iwrk1, i__, j, m; + extern logical lsame_(char *, char *); + integer icols; + logical noscl; + integer irows, jc; + extern /* Subroutine */ int slabad_(real *, real *); + integer in, mm, jr; + extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, integer * + ), sggbal_(char *, integer *, real *, integer *, + real *, integer *, integer *, integer *, real *, real *, real *, + integer *); + logical ilascl, ilbscl; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), sgghrd_( + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, integer *, real *, integer *, real *, integer *, + integer *); + logical ldumma[1]; + char chtemp[1]; + real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern real slamch_(char *); + integer ijobvl; + extern real slange_(char *, integer *, integer *, real *, integer *, real + *); + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *); + integer ijobvr; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *); + logical wantsb; + extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, + real *, real *, integer *); + real anrmto; + logical wantse; + real bnrmto; + extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real * + , real *, real *, real *, integer *, real *, integer *, real *, + integer *, integer *), stgevc_(char *, + char *, logical *, integer *, real *, integer *, real *, integer * + , real *, integer *, real *, integer *, integer *, integer *, + real *, integer *), stgsna_(char *, char *, + logical *, integer *, real *, integer *, real *, integer *, real * + , integer *, real *, integer *, real *, real *, integer *, + integer *, real *, integer *, integer *, integer *); + integer minwrk, maxwrk; + logical wantsn; + real smlnum; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + logical lquery, wantsv; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + real eps; + logical ilv; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* April 2012 */ + + +/* ===================================================================== */ + + +/* Decode the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alphar; + --alphai; + --beta; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --lscale; + --rscale; + --rconde; + --rcondv; + --work; + --iwork; + --bwork; + + /* Function Body */ + if (lsame_(jobvl, "N")) { + ijobvl = 1; + ilvl = FALSE_; + } else if (lsame_(jobvl, "V")) { + ijobvl = 2; + ilvl = TRUE_; + } else { + ijobvl = -1; + ilvl = FALSE_; + } + + if (lsame_(jobvr, "N")) { + ijobvr = 1; + ilvr = FALSE_; + } else if (lsame_(jobvr, "V")) { + ijobvr = 2; + ilvr = TRUE_; + } else { + ijobvr = -1; + ilvr = FALSE_; + } + ilv = ilvl || ilvr; + + noscl = lsame_(balanc, "N") || lsame_(balanc, "P"); + wantsn = lsame_(sense, "N"); + wantse = lsame_(sense, "E"); + wantsv = lsame_(sense, "V"); + wantsb = lsame_(sense, "B"); + +/* Test the input arguments */ + + *info = 0; + lquery = *lwork == -1; + if (! (noscl || lsame_(balanc, "S") || lsame_( + balanc, "B"))) { + *info = -1; + } else if (ijobvl <= 0) { + *info = -2; + } else if (ijobvr <= 0) { + *info = -3; + } else if (! (wantsn || wantse || wantsb || wantsv)) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (*ldvl < 1 || ilvl && *ldvl < *n) { + *info = -14; + } else if (*ldvr < 1 || ilvr && *ldvr < *n) { + *info = -16; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning "Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV. The workspace is */ +/* computed assuming ILO = 1 and IHI = N, the worst case.) */ + + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + if (noscl && ! ilv) { + minwrk = *n << 1; + } else { + minwrk = *n * 6; + } + if (wantse) { + minwrk = *n * 10; + } else if (wantsv || wantsb) { + minwrk = (*n << 1) * (*n + 4) + 16; + } + maxwrk = minwrk; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SORMQR", " ", n, & + c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + if (ilvl) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR", + " ", n, &c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + maxwrk = f2cmax(i__1,i__2); + } + } + work[1] = (real) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -26; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGEVX", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]); + ilascl = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + anrmto = smlnum; + ilascl = TRUE_; + } else if (anrm > bignum) { + anrmto = bignum; + ilascl = TRUE_; + } + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & + ierr); + } + +/* Scale B if f2cmax element outside range [SMLNUM,BIGNUM] */ + + bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]); + ilbscl = FALSE_; + if (bnrm > 0.f && bnrm < smlnum) { + bnrmto = smlnum; + ilbscl = TRUE_; + } else if (bnrm > bignum) { + bnrmto = bignum; + ilbscl = TRUE_; + } + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & + ierr); + } + +/* Permute and/or balance the matrix pair (A,B) */ +/* (Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) */ + + sggbal_(balanc, n, &a[a_offset], lda, &b[b_offset], ldb, ilo, ihi, & + lscale[1], &rscale[1], &work[1], &ierr); + +/* Compute ABNRM and BBNRM */ + + *abnrm = slange_("1", n, n, &a[a_offset], lda, &work[1]); + if (ilascl) { + work[1] = *abnrm; + slascl_("G", &c__0, &c__0, &anrmto, &anrm, &c__1, &c__1, &work[1], & + c__1, &ierr); + *abnrm = work[1]; + } + + *bbnrm = slange_("1", n, n, &b[b_offset], ldb, &work[1]); + if (ilbscl) { + work[1] = *bbnrm; + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, &c__1, &c__1, &work[1], & + c__1, &ierr); + *bbnrm = work[1]; + } + +/* Reduce B to triangular form (QR decomposition of B) */ +/* (Workspace: need N, prefer N*NB ) */ + + irows = *ihi + 1 - *ilo; + if (ilv || ! wantsn) { + icols = *n + 1 - *ilo; + } else { + icols = irows; + } + itau = 1; + iwrk = itau + irows; + i__1 = *lwork + 1 - iwrk; + sgeqrf_(&irows, &icols, &b[*ilo + *ilo * b_dim1], ldb, &work[itau], &work[ + iwrk], &i__1, &ierr); + +/* Apply the orthogonal transformation to A */ +/* (Workspace: need N, prefer N*NB) */ + + i__1 = *lwork + 1 - iwrk; + sormqr_("L", "T", &irows, &icols, &irows, &b[*ilo + *ilo * b_dim1], ldb, & + work[itau], &a[*ilo + *ilo * a_dim1], lda, &work[iwrk], &i__1, & + ierr); + +/* Initialize VL and/or VR */ +/* (Workspace: need N, prefer N*NB) */ + + if (ilvl) { + slaset_("Full", n, n, &c_b57, &c_b58, &vl[vl_offset], ldvl) + ; + if (irows > 1) { + i__1 = irows - 1; + i__2 = irows - 1; + slacpy_("L", &i__1, &i__2, &b[*ilo + 1 + *ilo * b_dim1], ldb, &vl[ + *ilo + 1 + *ilo * vl_dim1], ldvl); + } + i__1 = *lwork + 1 - iwrk; + sorgqr_(&irows, &irows, &irows, &vl[*ilo + *ilo * vl_dim1], ldvl, & + work[itau], &work[iwrk], &i__1, &ierr); + } + + if (ilvr) { + slaset_("Full", n, n, &c_b57, &c_b58, &vr[vr_offset], ldvr) + ; + } + +/* Reduce to generalized Hessenberg form */ +/* (Workspace: none needed) */ + + if (ilv || ! wantsn) { + +/* Eigenvectors requested -- work on whole matrix. */ + + sgghrd_(jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); + } else { + sgghrd_("N", "N", &irows, &c__1, &irows, &a[*ilo + *ilo * a_dim1], + lda, &b[*ilo + *ilo * b_dim1], ldb, &vl[vl_offset], ldvl, &vr[ + vr_offset], ldvr, &ierr); + } + +/* Perform QZ algorithm (Compute eigenvalues, and optionally, the */ +/* Schur forms and Schur vectors) */ +/* (Workspace: need N) */ + + if (ilv || ! wantsn) { + *(unsigned char *)chtemp = 'S'; + } else { + *(unsigned char *)chtemp = 'E'; + } + + shgeqz_(chtemp, jobvl, jobvr, n, ilo, ihi, &a[a_offset], lda, &b[b_offset] + , ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], ldvl, & + vr[vr_offset], ldvr, &work[1], lwork, &ierr); + if (ierr != 0) { + if (ierr > 0 && ierr <= *n) { + *info = ierr; + } else if (ierr > *n && ierr <= *n << 1) { + *info = ierr - *n; + } else { + *info = *n + 1; + } + goto L130; + } + +/* Compute Eigenvectors and estimate condition numbers if desired */ +/* (Workspace: STGEVC: need 6*N */ +/* STGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B', */ +/* need N otherwise ) */ + + if (ilv || ! wantsn) { + if (ilv) { + if (ilvl) { + if (ilvr) { + *(unsigned char *)chtemp = 'B'; + } else { + *(unsigned char *)chtemp = 'L'; + } + } else { + *(unsigned char *)chtemp = 'R'; + } + + stgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], + ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, & + work[1], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L130; + } + } + + if (! wantsn) { + +/* compute eigenvectors (STGEVC) and estimate condition */ +/* numbers (STGSNA). Note that the definition of the condition */ +/* number is not invariant under transformation (u,v) to */ +/* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized */ +/* Schur form (S,T), Q and Z are orthogonal matrices. In order */ +/* to avoid using extra 2*N*N workspace, we have to recalculate */ +/* eigenvectors and estimate one condition numbers at a time. */ + + pair = FALSE_; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + + if (pair) { + pair = FALSE_; + goto L20; + } + mm = 1; + if (i__ < *n) { + if (a[i__ + 1 + i__ * a_dim1] != 0.f) { + pair = TRUE_; + mm = 2; + } + } + + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + bwork[j] = FALSE_; +/* L10: */ + } + if (mm == 1) { + bwork[i__] = TRUE_; + } else if (mm == 2) { + bwork[i__] = TRUE_; + bwork[i__ + 1] = TRUE_; + } + + iwrk = mm * *n + 1; + iwrk1 = iwrk + mm * *n; + +/* Compute a pair of left and right eigenvectors. */ +/* (compute workspace: need up to 4*N + 6*N) */ + + if (wantse || wantsb) { + stgevc_("B", "S", &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &work[1], n, &work[iwrk], n, &mm, + &m, &work[iwrk1], &ierr); + if (ierr != 0) { + *info = *n + 2; + goto L130; + } + } + + i__2 = *lwork - iwrk1 + 1; + stgsna_(sense, "S", &bwork[1], n, &a[a_offset], lda, &b[ + b_offset], ldb, &work[1], n, &work[iwrk], n, &rconde[ + i__], &rcondv[i__], &mm, &m, &work[iwrk1], &i__2, & + iwork[1], &ierr); + +L20: + ; + } + } + } + +/* Undo balancing on VL and VR and normalization */ +/* (Workspace: none needed) */ + + if (ilvl) { + sggbak_(balanc, "L", n, ilo, ihi, &lscale[1], &rscale[1], n, &vl[ + vl_offset], ldvl, &ierr); + + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.f) { + goto L70; + } + temp = 0.f; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__2 = temp, r__3 = (r__1 = vl[jr + jc * vl_dim1], abs( + r__1)); + temp = f2cmax(r__2,r__3); +/* L30: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__3 = temp, r__4 = (r__1 = vl[jr + jc * vl_dim1], abs( + r__1)) + (r__2 = vl[jr + (jc + 1) * vl_dim1], abs( + r__2)); + temp = f2cmax(r__3,r__4); +/* L40: */ + } + } + if (temp < smlnum) { + goto L70; + } + temp = 1.f / temp; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; +/* L50: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vl[jr + jc * vl_dim1] *= temp; + vl[jr + (jc + 1) * vl_dim1] *= temp; +/* L60: */ + } + } +L70: + ; + } + } + if (ilvr) { + sggbak_(balanc, "R", n, ilo, ihi, &lscale[1], &rscale[1], n, &vr[ + vr_offset], ldvr, &ierr); + i__1 = *n; + for (jc = 1; jc <= i__1; ++jc) { + if (alphai[jc] < 0.f) { + goto L120; + } + temp = 0.f; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__2 = temp, r__3 = (r__1 = vr[jr + jc * vr_dim1], abs( + r__1)); + temp = f2cmax(r__2,r__3); +/* L80: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { +/* Computing MAX */ + r__3 = temp, r__4 = (r__1 = vr[jr + jc * vr_dim1], abs( + r__1)) + (r__2 = vr[jr + (jc + 1) * vr_dim1], abs( + r__2)); + temp = f2cmax(r__3,r__4); +/* L90: */ + } + } + if (temp < smlnum) { + goto L120; + } + temp = 1.f / temp; + if (alphai[jc] == 0.f) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; +/* L100: */ + } + } else { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + vr[jr + jc * vr_dim1] *= temp; + vr[jr + (jc + 1) * vr_dim1] *= temp; +/* L110: */ + } + } +L120: + ; + } + } + +/* Undo scaling if necessary */ + +L130: + + if (ilascl) { + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & + ierr); + slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & + ierr); + } + + if (ilbscl) { + slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & + ierr); + } + + work[1] = (real) maxwrk; + return 0; + +/* End of SGGEVX */ + +} /* sggevx_ */ + diff --git a/lapack-netlib/SRC/sggglm.c b/lapack-netlib/SRC/sggglm.c new file mode 100644 index 000000000..d2a47c68b --- /dev/null +++ b/lapack-netlib/SRC/sggglm.c @@ -0,0 +1,787 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGGGLM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGGLM + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* REAL A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), */ +/* $ X( * ), Y( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGGLM solves a general Gauss-Markov linear model (GLM) problem: */ +/* > */ +/* > minimize || y ||_2 subject to d = A*x + B*y */ +/* > x */ +/* > */ +/* > where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */ +/* > given N-vector. It is assumed that M <= N <= M+P, and */ +/* > */ +/* > rank(A) = M and rank( A B ) = N. */ +/* > */ +/* > Under these assumptions, the constrained equation is always */ +/* > consistent, and there is a unique solution x and a minimal 2-norm */ +/* > solution y, which is obtained using a generalized QR factorization */ +/* > of the matrices (A, B) given by */ +/* > */ +/* > A = Q*(R), B = Q*T*Z. */ +/* > (0) */ +/* > */ +/* > In particular, if matrix B is square nonsingular, then the problem */ +/* > GLM is equivalent to the following weighted linear least squares */ +/* > problem */ +/* > */ +/* > minimize || inv(B)*(d-A*x) ||_2 */ +/* > x */ +/* > */ +/* > where inv(B) denotes the inverse of B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix A. 0 <= M <= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of columns of the matrix B. P >= N-M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,M) */ +/* > On entry, the N-by-M matrix A. */ +/* > On exit, the upper triangular part of the array A contains */ +/* > the M-by-M upper triangular matrix R. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,P) */ +/* > On entry, the N-by-P matrix B. */ +/* > On exit, if N <= P, the upper triangle of the subarray */ +/* > B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ +/* > if N > P, the elements on and above the (N-P)th subdiagonal */ +/* > contain the N-by-P upper trapezoidal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > On entry, D is the left hand side of the GLM equation. */ +/* > On exit, D is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is REAL array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Y */ +/* > \verbatim */ +/* > Y is REAL array, dimension (P) */ +/* > */ +/* > On exit, X and Y are the solutions of the GLM problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N+M+P). */ +/* > For optimum performance, LWORK >= M+f2cmin(N,P)+f2cmax(N,P)*NB, */ +/* > where NB is an upper bound for the optimal blocksizes for */ +/* > SGEQRF, SGERQF, SORMQR and SORMRQ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1: the upper triangular factor R associated with A in the */ +/* > generalized QR factorization of the pair (A, B) is */ +/* > singular, so that rank(A) < M; the least squares */ +/* > solution could not be computed. */ +/* > = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal */ +/* > factor T associated with B in the generalized QR */ +/* > factorization of the pair (A, B) is singular, so that */ +/* > rank( A B ) < N; the least squares solution could not */ +/* > be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realOTHEReigen */ + +/* ===================================================================== */ +/* Subroutine */ int sggglm_(integer *n, integer *m, integer *p, real *a, + integer *lda, real *b, integer *ldb, real *d__, real *x, real *y, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer lopt, i__; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *); + integer nb, np; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sggqrf_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, real *, real *, integer * + , integer *); + integer lwkmin, nb1, nb2, nb3, nb4, lwkopt; + logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *), sormrq_(char *, char *, + integer *, integer *, integer *, real *, integer *, real *, real * + , integer *, real *, integer *, integer *), + strtrs_(char *, char *, char *, integer *, integer *, real *, + integer *, real *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* =================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --d__; + --x; + --y; + --work; + + /* Function Body */ + *info = 0; + np = f2cmin(*n,*p); + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*m < 0 || *m > *n) { + *info = -2; + } else if (*p < 0 || *p < *n - *m) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + +/* Calculate workspace */ + + if (*info == 0) { + if (*n == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb2 = ilaenv_(&c__1, "SGERQF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb3 = ilaenv_(&c__1, "SORMQR", " ", n, m, p, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb4 = ilaenv_(&c__1, "SORMRQ", " ", n, m, p, &c_n1, (ftnlen)6, ( + ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3); + nb = f2cmax(i__1,nb4); + lwkmin = *m + *n + *p; + lwkopt = *m + np + f2cmax(*n,*p) * nb; + } + work[1] = (real) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGGLM", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = 0.f; + } + i__1 = *p; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; + } + return 0; + } + +/* Compute the GQR factorization of matrices A and B: */ + +/* Q**T*A = ( R11 ) M, Q**T*B*Z**T = ( T11 T12 ) M */ +/* ( 0 ) N-M ( 0 T22 ) N-M */ +/* M M+P-N N-M */ + +/* where R11 and T22 are upper triangular, and Q and Z are */ +/* orthogonal. */ + + i__1 = *lwork - *m - np; + sggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m + + 1], &work[*m + np + 1], &i__1, info); + lopt = work[*m + np + 1]; + +/* Update left-hand-side vector d = Q**T*d = ( d1 ) M */ +/* ( d2 ) N-M */ + + i__1 = f2cmax(1,*n); + i__2 = *lwork - *m - np; + sormqr_("Left", "Transpose", n, &c__1, m, &a[a_offset], lda, &work[1], & + d__[1], &i__1, &work[*m + np + 1], &i__2, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[*m + np + 1]; + lopt = f2cmax(i__1,i__2); + +/* Solve T22*y2 = d2 for y2 */ + + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + strtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 + + (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, + info); + + if (*info > 0) { + *info = 1; + return 0; + } + + i__1 = *n - *m; + scopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1); + } + +/* Set y1 = 0 */ + + i__1 = *m + *p - *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; +/* L10: */ + } + +/* Update d1 = d1 - T12*y2 */ + + i__1 = *n - *m; + sgemv_("No transpose", m, &i__1, &c_b32, &b[(*m + *p - *n + 1) * b_dim1 + + 1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b34, &d__[1], &c__1); + +/* Solve triangular system: R11*x = d1 */ + + if (*m > 0) { + strtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], + lda, &d__[1], m, info); + + if (*info > 0) { + *info = 2; + return 0; + } + +/* Copy D to X */ + + scopy_(m, &d__[1], &c__1, &x[1], &c__1); + } + +/* Backward transformation y = Z**T *y */ + +/* Computing MAX */ + i__1 = 1, i__2 = *n - *p + 1; + i__3 = f2cmax(1,*p); + i__4 = *lwork - *m - np; + sormrq_("Left", "Transpose", p, &c__1, &np, &b[f2cmax(i__1,i__2) + b_dim1], + ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &i__4, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[*m + np + 1]; + work[1] = (real) (*m + np + f2cmax(i__1,i__2)); + + return 0; + +/* End of SGGGLM */ + +} /* sggglm_ */ + diff --git a/lapack-netlib/SRC/sgghd3.c b/lapack-netlib/SRC/sgghd3.c new file mode 100644 index 000000000..78428304a --- /dev/null +++ b/lapack-netlib/SRC/sgghd3.c @@ -0,0 +1,1452 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGGHD3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGHRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */ +/* LDQ, Z, LDZ, WORK, LWORK, INFO ) */ + +/* CHARACTER COMPQ, COMPZ */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK */ +/* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ Z( LDZ, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGHD3 reduces a pair of real matrices (A,B) to generalized upper */ +/* > Hessenberg form using orthogonal transformations, where A is a */ +/* > general matrix and B is upper triangular. The form of the */ +/* > generalized eigenvalue problem is */ +/* > A*x = lambda*B*x, */ +/* > and B is typically made upper triangular by computing its QR */ +/* > factorization and moving the orthogonal matrix Q to the left side */ +/* > of the equation. */ +/* > */ +/* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */ +/* > Q**T*A*Z = H */ +/* > and transforms B to another upper triangular matrix T: */ +/* > Q**T*B*Z = T */ +/* > in order to reduce the problem to its standard form */ +/* > H*y = lambda*T*y */ +/* > where y = Z**T*x. */ +/* > */ +/* > The orthogonal matrices Q and Z are determined as products of Givens */ +/* > rotations. They may either be formed explicitly, or they may be */ +/* > postmultiplied into input matrices Q1 and Z1, so that */ +/* > */ +/* > Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T */ +/* > */ +/* > Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T */ +/* > */ +/* > If Q1 is the orthogonal matrix from the QR factorization of B in the */ +/* > original equation A*x = lambda*B*x, then SGGHD3 reduces the original */ +/* > problem to generalized Hessenberg form. */ +/* > */ +/* > This is a blocked variant of SGGHRD, using matrix-matrix */ +/* > multiplications for parts of the computation to enhance performance. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPQ */ +/* > \verbatim */ +/* > COMPQ is CHARACTER*1 */ +/* > = 'N': do not compute Q; */ +/* > = 'I': Q is initialized to the unit matrix, and the */ +/* > orthogonal matrix Q is returned; */ +/* > = 'V': Q must contain an orthogonal matrix Q1 on entry, */ +/* > and the product Q1*Q is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': do not compute Z; */ +/* > = 'I': Z is initialized to the unit matrix, and the */ +/* > orthogonal matrix Z is returned; */ +/* > = 'V': Z must contain an orthogonal matrix Z1 on entry, */ +/* > and the product Z1*Z is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > ILO and IHI mark the rows and columns of A which are to be */ +/* > reduced. It is assumed that A is already upper triangular */ +/* > in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are */ +/* > normally set by a previous call to SGGBAL; otherwise they */ +/* > should be set to 1 and N respectively. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the N-by-N general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > rest is set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB, N) */ +/* > On entry, the N-by-N upper triangular matrix B. */ +/* > On exit, the upper triangular matrix T = Q**T B Z. The */ +/* > elements below the diagonal are set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is REAL array, dimension (LDQ, N) */ +/* > On entry, if COMPQ = 'V', the orthogonal matrix Q1, */ +/* > typically from the QR factorization of B. */ +/* > On exit, if COMPQ='I', the orthogonal matrix Q, and if */ +/* > COMPQ = 'V', the product Q1*Q. */ +/* > Not referenced if COMPQ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. */ +/* > LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is REAL array, dimension (LDZ, N) */ +/* > On entry, if COMPZ = 'V', the orthogonal matrix Z1. */ +/* > On exit, if COMPZ='I', the orthogonal matrix Z, and if */ +/* > COMPZ = 'V', the product Z1*Z. */ +/* > Not referenced if COMPZ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. */ +/* > LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (LWORK) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= 1. */ +/* > For optimum performance LWORK >= 6*N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date January 2015 */ + +/* > \ingroup realOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This routine reduces A to Hessenberg form and maintains B in */ +/* > using a blocked variant of Moler and Stewart's original algorithm, */ +/* > as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti */ +/* > (BIT 2008). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgghd3_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real + *q, integer *ldq, real *z__, integer *ldz, real *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; + real r__1; + + /* Local variables */ + logical blk22; + integer cola, jcol, ierr; + real temp; + integer jrow, topq, ppwo; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + real temp1, temp2, temp3, c__; + integer kacc22, i__, j, k; + real s; + extern logical lsame_(char *, char *); + integer nbmin; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), sgemv_(char *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + integer nblst; + logical initq; + real c1, c2; + extern /* Subroutine */ int sorm22_(char *, char *, integer *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real * + , integer *, integer *); + logical wantq; + integer j0; + logical initz, wantz; + real s1, s2; + extern /* Subroutine */ int strmv_(char *, char *, char *, integer *, + real *, integer *, real *, integer *); + char compq2[1], compz2[1]; + integer nb, jj, nh, nx, pw; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, real *, integer * + , real *, integer *, integer *), slaset_(char *, + integer *, integer *, real *, real *, real *, integer *), + slartg_(real *, real *, real *, real *, real *), slacpy_(char *, + integer *, integer *, real *, integer *, real *, integer *); + integer lwkopt; + logical lquery; + integer nnb, len, top, ppw, n2nb; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* January 2015 */ + + + +/* ===================================================================== */ + + +/* Decode and test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen) + 1); +/* Computing MAX */ + i__1 = *n * 6 * nb; + lwkopt = f2cmax(i__1,1); + work[1] = (real) lwkopt; + initq = lsame_(compq, "I"); + wantq = initq || lsame_(compq, "V"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + lquery = *lwork == -1; + + if (! lsame_(compq, "N") && ! wantq) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*ihi > *n || *ihi < *ilo - 1) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (wantq && *ldq < *n || *ldq < 1) { + *info = -11; + } else if (wantz && *ldz < *n || *ldz < 1) { + *info = -13; + } else if (*lwork < 1 && ! lquery) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGHD3", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Initialize Q and Z if desired. */ + + if (initq) { + slaset_("All", n, n, &c_b14, &c_b15, &q[q_offset], ldq); + } + if (initz) { + slaset_("All", n, n, &c_b14, &c_b15, &z__[z_offset], ldz); + } + +/* Zero out lower triangle of B. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("Lower", &i__1, &i__2, &c_b14, &c_b14, &b[b_dim1 + 2], ldb); + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.f; + return 0; + } + +/* Determine the blocksize. */ + + nbmin = ilaenv_(&c__2, "SGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb > 1 && nb < nh) { + +/* Determine when to use unblocked instead of blocked code. */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, "SGGHD3", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = f2cmax(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code. */ + + if (*lwork < lwkopt) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code. */ + +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGGHD3", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = f2cmax(i__1,i__2); + if (*lwork >= *n * 6 * nbmin) { + nb = *lwork / (*n * 6); + } else { + nb = 1; + } + } + } + } + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + jcol = *ilo; + + } else { + +/* Use blocked code */ + + kacc22 = ilaenv_(&c__16, "SGGHD3", " ", n, ilo, ihi, &c_n1, (ftnlen)6, + (ftnlen)1); + blk22 = kacc22 == 2; + i__1 = *ihi - 2; + i__2 = nb; + for (jcol = *ilo; i__2 < 0 ? jcol >= i__1 : jcol <= i__1; jcol += + i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - jcol - 1; + nnb = f2cmin(i__3,i__4); + +/* Initialize small orthogonal factors that will hold the */ +/* accumulated Givens rotations in workspace. */ +/* N2NB denotes the number of 2*NNB-by-2*NNB factors */ +/* NBLST denotes the (possibly smaller) order of the last */ +/* factor. */ + + n2nb = (*ihi - jcol - 1) / nnb - 1; + nblst = *ihi - jcol - n2nb * nnb; + slaset_("All", &nblst, &nblst, &c_b14, &c_b15, &work[1], &nblst); + pw = nblst * nblst + 1; + i__3 = n2nb; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = nnb << 1; + i__5 = nnb << 1; + i__6 = nnb << 1; + slaset_("All", &i__4, &i__5, &c_b14, &c_b15, &work[pw], &i__6); + pw += (nnb << 2) * nnb; + } + +/* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form. */ + + i__3 = jcol + nnb - 1; + for (j = jcol; j <= i__3; ++j) { + +/* Reduce Jth column of A. Store cosines and sines in Jth */ +/* column of A and B, respectively. */ + + i__4 = j + 2; + for (i__ = *ihi; i__ >= i__4; --i__) { + temp = a[i__ - 1 + j * a_dim1]; + slartg_(&temp, &a[i__ + j * a_dim1], &c__, &s, &a[i__ - 1 + + j * a_dim1]); + a[i__ + j * a_dim1] = c__; + b[i__ + j * b_dim1] = s; + } + +/* Accumulate Givens rotations into workspace array. */ + + ppw = (nblst + 1) * (nblst - 2) - j + jcol + 1; + len = j + 2 - jcol; + jrow = j + n2nb * nnb + 2; + i__4 = jrow; + for (i__ = *ihi; i__ >= i__4; --i__) { + c__ = a[i__ + j * a_dim1]; + s = b[i__ + j * b_dim1]; + i__5 = ppw + len - 1; + for (jj = ppw; jj <= i__5; ++jj) { + temp = work[jj + nblst]; + work[jj + nblst] = c__ * temp - s * work[jj]; + work[jj] = s * temp + c__ * work[jj]; + } + ++len; + ppw = ppw - nblst - 1; + } + + ppwo = nblst * nblst + (nnb + j - jcol - 1 << 1) * nnb + nnb; + j0 = jrow - nnb; + i__4 = j + 2; + i__5 = -nnb; + for (jrow = j0; i__5 < 0 ? jrow >= i__4 : jrow <= i__4; jrow + += i__5) { + ppw = ppwo; + len = j + 2 - jcol; + i__6 = jrow; + for (i__ = jrow + nnb - 1; i__ >= i__6; --i__) { + c__ = a[i__ + j * a_dim1]; + s = b[i__ + j * b_dim1]; + i__7 = ppw + len - 1; + for (jj = ppw; jj <= i__7; ++jj) { + temp = work[jj + (nnb << 1)]; + work[jj + (nnb << 1)] = c__ * temp - s * work[jj]; + work[jj] = s * temp + c__ * work[jj]; + } + ++len; + ppw = ppw - (nnb << 1) - 1; + } + ppwo += (nnb << 2) * nnb; + } + +/* TOP denotes the number of top rows in A and B that will */ +/* not be updated during the next steps. */ + + if (jcol <= 2) { + top = 0; + } else { + top = jcol; + } + +/* Propagate transformations through B and replace stored */ +/* left sines/cosines by right sines/cosines. */ + + i__5 = j + 1; + for (jj = *n; jj >= i__5; --jj) { + +/* Update JJth column of B. */ + +/* Computing MIN */ + i__4 = jj + 1; + i__6 = j + 2; + for (i__ = f2cmin(i__4,*ihi); i__ >= i__6; --i__) { + c__ = a[i__ + j * a_dim1]; + s = b[i__ + j * b_dim1]; + temp = b[i__ + jj * b_dim1]; + b[i__ + jj * b_dim1] = c__ * temp - s * b[i__ - 1 + + jj * b_dim1]; + b[i__ - 1 + jj * b_dim1] = s * temp + c__ * b[i__ - 1 + + jj * b_dim1]; + } + +/* Annihilate B( JJ+1, JJ ). */ + + if (jj < *ihi) { + temp = b[jj + 1 + (jj + 1) * b_dim1]; + slartg_(&temp, &b[jj + 1 + jj * b_dim1], &c__, &s, &b[ + jj + 1 + (jj + 1) * b_dim1]); + b[jj + 1 + jj * b_dim1] = 0.f; + i__6 = jj - top; + srot_(&i__6, &b[top + 1 + (jj + 1) * b_dim1], &c__1, & + b[top + 1 + jj * b_dim1], &c__1, &c__, &s); + a[jj + 1 + j * a_dim1] = c__; + b[jj + 1 + j * b_dim1] = -s; + } + } + +/* Update A by transformations from right. */ +/* Explicit loop unrolling provides better performance */ +/* compared to SLASR. */ +/* CALL SLASR( 'Right', 'Variable', 'Backward', IHI-TOP, */ +/* $ IHI-J, A( J+2, J ), B( J+2, J ), */ +/* $ A( TOP+1, J+1 ), LDA ) */ + + jj = (*ihi - j - 1) % 3; + i__5 = jj + 1; + for (i__ = *ihi - j - 3; i__ >= i__5; i__ += -3) { + c__ = a[j + 1 + i__ + j * a_dim1]; + s = -b[j + 1 + i__ + j * b_dim1]; + c1 = a[j + 2 + i__ + j * a_dim1]; + s1 = -b[j + 2 + i__ + j * b_dim1]; + c2 = a[j + 3 + i__ + j * a_dim1]; + s2 = -b[j + 3 + i__ + j * b_dim1]; + + i__6 = *ihi; + for (k = top + 1; k <= i__6; ++k) { + temp = a[k + (j + i__) * a_dim1]; + temp1 = a[k + (j + i__ + 1) * a_dim1]; + temp2 = a[k + (j + i__ + 2) * a_dim1]; + temp3 = a[k + (j + i__ + 3) * a_dim1]; + a[k + (j + i__ + 3) * a_dim1] = c2 * temp3 + s2 * + temp2; + temp2 = -s2 * temp3 + c2 * temp2; + a[k + (j + i__ + 2) * a_dim1] = c1 * temp2 + s1 * + temp1; + temp1 = -s1 * temp2 + c1 * temp1; + a[k + (j + i__ + 1) * a_dim1] = c__ * temp1 + s * + temp; + a[k + (j + i__) * a_dim1] = -s * temp1 + c__ * temp; + } + } + + if (jj > 0) { + for (i__ = jj; i__ >= 1; --i__) { + i__5 = *ihi - top; + r__1 = -b[j + 1 + i__ + j * b_dim1]; + srot_(&i__5, &a[top + 1 + (j + i__ + 1) * a_dim1], & + c__1, &a[top + 1 + (j + i__) * a_dim1], &c__1, + &a[j + 1 + i__ + j * a_dim1], &r__1); + } + } + +/* Update (J+1)th column of A by transformations from left. */ + + if (j < jcol + nnb - 1) { + len = j + 1 - jcol; + +/* Multiply with the trailing accumulated orthogonal */ +/* matrix, which takes the form */ + +/* [ U11 U12 ] */ +/* U = [ ], */ +/* [ U21 U22 ] */ + +/* where U21 is a LEN-by-LEN matrix and U12 is lower */ +/* triangular. */ + + jrow = *ihi - nblst + 1; + sgemv_("Transpose", &nblst, &len, &c_b15, &work[1], & + nblst, &a[jrow + (j + 1) * a_dim1], &c__1, &c_b14, + &work[pw], &c__1); + ppw = pw + len; + i__5 = jrow + nblst - len - 1; + for (i__ = jrow; i__ <= i__5; ++i__) { + work[ppw] = a[i__ + (j + 1) * a_dim1]; + ++ppw; + } + i__5 = nblst - len; + strmv_("Lower", "Transpose", "Non-unit", &i__5, &work[len + * nblst + 1], &nblst, &work[pw + len], &c__1); + i__5 = nblst - len; + sgemv_("Transpose", &len, &i__5, &c_b15, &work[(len + 1) * + nblst - len + 1], &nblst, &a[jrow + nblst - len + + (j + 1) * a_dim1], &c__1, &c_b15, &work[pw + + len], &c__1); + ppw = pw; + i__5 = jrow + nblst - 1; + for (i__ = jrow; i__ <= i__5; ++i__) { + a[i__ + (j + 1) * a_dim1] = work[ppw]; + ++ppw; + } + +/* Multiply with the other accumulated orthogonal */ +/* matrices, which take the form */ + +/* [ U11 U12 0 ] */ +/* [ ] */ +/* U = [ U21 U22 0 ], */ +/* [ ] */ +/* [ 0 0 I ] */ + +/* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity */ +/* matrix, U21 is a LEN-by-LEN upper triangular matrix */ +/* and U12 is an NNB-by-NNB lower triangular matrix. */ + + ppwo = nblst * nblst + 1; + j0 = jrow - nnb; + i__5 = jcol + 1; + i__6 = -nnb; + for (jrow = j0; i__6 < 0 ? jrow >= i__5 : jrow <= i__5; + jrow += i__6) { + ppw = pw + len; + i__4 = jrow + nnb - 1; + for (i__ = jrow; i__ <= i__4; ++i__) { + work[ppw] = a[i__ + (j + 1) * a_dim1]; + ++ppw; + } + ppw = pw; + i__4 = jrow + nnb + len - 1; + for (i__ = jrow + nnb; i__ <= i__4; ++i__) { + work[ppw] = a[i__ + (j + 1) * a_dim1]; + ++ppw; + } + i__4 = nnb << 1; + strmv_("Upper", "Transpose", "Non-unit", &len, &work[ + ppwo + nnb], &i__4, &work[pw], &c__1); + i__4 = nnb << 1; + strmv_("Lower", "Transpose", "Non-unit", &nnb, &work[ + ppwo + (len << 1) * nnb], &i__4, &work[pw + + len], &c__1); + i__4 = nnb << 1; + sgemv_("Transpose", &nnb, &len, &c_b15, &work[ppwo], & + i__4, &a[jrow + (j + 1) * a_dim1], &c__1, & + c_b15, &work[pw], &c__1); + i__4 = nnb << 1; + sgemv_("Transpose", &len, &nnb, &c_b15, &work[ppwo + ( + len << 1) * nnb + nnb], &i__4, &a[jrow + nnb + + (j + 1) * a_dim1], &c__1, &c_b15, &work[pw + + len], &c__1); + ppw = pw; + i__4 = jrow + len + nnb - 1; + for (i__ = jrow; i__ <= i__4; ++i__) { + a[i__ + (j + 1) * a_dim1] = work[ppw]; + ++ppw; + } + ppwo += (nnb << 2) * nnb; + } + } + } + +/* Apply accumulated orthogonal matrices to A. */ + + cola = *n - jcol - nnb + 1; + j = *ihi - nblst + 1; + sgemm_("Transpose", "No Transpose", &nblst, &cola, &nblst, &c_b15, + &work[1], &nblst, &a[j + (jcol + nnb) * a_dim1], lda, & + c_b14, &work[pw], &nblst); + slacpy_("All", &nblst, &cola, &work[pw], &nblst, &a[j + (jcol + + nnb) * a_dim1], lda); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__3 = jcol + 1; + i__6 = -nnb; + for (j = j0; i__6 < 0 ? j >= i__3 : j <= i__3; j += i__6) { + if (blk22) { + +/* Exploit the structure of */ + +/* [ U11 U12 ] */ +/* U = [ ] */ +/* [ U21 U22 ], */ + +/* where all blocks are NNB-by-NNB, U21 is upper */ +/* triangular and U12 is lower triangular. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + sorm22_("Left", "Transpose", &i__5, &cola, &nnb, &nnb, & + work[ppwo], &i__4, &a[j + (jcol + nnb) * a_dim1], + lda, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + i__8 = nnb << 1; + sgemm_("Transpose", "No Transpose", &i__5, &cola, &i__4, & + c_b15, &work[ppwo], &i__7, &a[j + (jcol + nnb) * + a_dim1], lda, &c_b14, &work[pw], &i__8); + i__5 = nnb << 1; + i__4 = nnb << 1; + slacpy_("All", &i__5, &cola, &work[pw], &i__4, &a[j + ( + jcol + nnb) * a_dim1], lda); + } + ppwo += (nnb << 2) * nnb; + } + +/* Apply accumulated orthogonal matrices to Q. */ + + if (wantq) { + j = *ihi - nblst + 1; + if (initq) { +/* Computing MAX */ + i__6 = 2, i__3 = j - jcol + 1; + topq = f2cmax(i__6,i__3); + nh = *ihi - topq + 1; + } else { + topq = 1; + nh = *n; + } + sgemm_("No Transpose", "No Transpose", &nh, &nblst, &nblst, & + c_b15, &q[topq + j * q_dim1], ldq, &work[1], &nblst, & + c_b14, &work[pw], &nh); + slacpy_("All", &nh, &nblst, &work[pw], &nh, &q[topq + j * + q_dim1], ldq); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__6 = jcol + 1; + i__3 = -nnb; + for (j = j0; i__3 < 0 ? j >= i__6 : j <= i__6; j += i__3) { + if (initq) { +/* Computing MAX */ + i__5 = 2, i__4 = j - jcol + 1; + topq = f2cmax(i__5,i__4); + nh = *ihi - topq + 1; + } + if (blk22) { + +/* Exploit the structure of U. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + sorm22_("Right", "No Transpose", &nh, &i__5, &nnb, & + nnb, &work[ppwo], &i__4, &q[topq + j * q_dim1] + , ldq, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__5 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + sgemm_("No Transpose", "No Transpose", &nh, &i__5, & + i__4, &c_b15, &q[topq + j * q_dim1], ldq, & + work[ppwo], &i__7, &c_b14, &work[pw], &nh); + i__5 = nnb << 1; + slacpy_("All", &nh, &i__5, &work[pw], &nh, &q[topq + + j * q_dim1], ldq); + } + ppwo += (nnb << 2) * nnb; + } + } + +/* Accumulate right Givens rotations if required. */ + + if (wantz || top > 0) { + +/* Initialize small orthogonal factors that will hold the */ +/* accumulated Givens rotations in workspace. */ + + slaset_("All", &nblst, &nblst, &c_b14, &c_b15, &work[1], & + nblst); + pw = nblst * nblst + 1; + i__3 = n2nb; + for (i__ = 1; i__ <= i__3; ++i__) { + i__6 = nnb << 1; + i__5 = nnb << 1; + i__4 = nnb << 1; + slaset_("All", &i__6, &i__5, &c_b14, &c_b15, &work[pw], & + i__4); + pw += (nnb << 2) * nnb; + } + +/* Accumulate Givens rotations into workspace array. */ + + i__3 = jcol + nnb - 1; + for (j = jcol; j <= i__3; ++j) { + ppw = (nblst + 1) * (nblst - 2) - j + jcol + 1; + len = j + 2 - jcol; + jrow = j + n2nb * nnb + 2; + i__6 = jrow; + for (i__ = *ihi; i__ >= i__6; --i__) { + c__ = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = 0.f; + s = b[i__ + j * b_dim1]; + b[i__ + j * b_dim1] = 0.f; + i__5 = ppw + len - 1; + for (jj = ppw; jj <= i__5; ++jj) { + temp = work[jj + nblst]; + work[jj + nblst] = c__ * temp - s * work[jj]; + work[jj] = s * temp + c__ * work[jj]; + } + ++len; + ppw = ppw - nblst - 1; + } + + ppwo = nblst * nblst + (nnb + j - jcol - 1 << 1) * nnb + + nnb; + j0 = jrow - nnb; + i__6 = j + 2; + i__5 = -nnb; + for (jrow = j0; i__5 < 0 ? jrow >= i__6 : jrow <= i__6; + jrow += i__5) { + ppw = ppwo; + len = j + 2 - jcol; + i__4 = jrow; + for (i__ = jrow + nnb - 1; i__ >= i__4; --i__) { + c__ = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = 0.f; + s = b[i__ + j * b_dim1]; + b[i__ + j * b_dim1] = 0.f; + i__7 = ppw + len - 1; + for (jj = ppw; jj <= i__7; ++jj) { + temp = work[jj + (nnb << 1)]; + work[jj + (nnb << 1)] = c__ * temp - s * work[ + jj]; + work[jj] = s * temp + c__ * work[jj]; + } + ++len; + ppw = ppw - (nnb << 1) - 1; + } + ppwo += (nnb << 2) * nnb; + } + } + } else { + + i__3 = *ihi - jcol - 1; + slaset_("Lower", &i__3, &nnb, &c_b14, &c_b14, &a[jcol + 2 + + jcol * a_dim1], lda); + i__3 = *ihi - jcol - 1; + slaset_("Lower", &i__3, &nnb, &c_b14, &c_b14, &b[jcol + 2 + + jcol * b_dim1], ldb); + } + +/* Apply accumulated orthogonal matrices to A and B. */ + + if (top > 0) { + j = *ihi - nblst + 1; + sgemm_("No Transpose", "No Transpose", &top, &nblst, &nblst, & + c_b15, &a[j * a_dim1 + 1], lda, &work[1], &nblst, & + c_b14, &work[pw], &top); + slacpy_("All", &top, &nblst, &work[pw], &top, &a[j * a_dim1 + + 1], lda); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__3 = jcol + 1; + i__5 = -nnb; + for (j = j0; i__5 < 0 ? j >= i__3 : j <= i__3; j += i__5) { + if (blk22) { + +/* Exploit the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + sorm22_("Right", "No Transpose", &top, &i__6, &nnb, & + nnb, &work[ppwo], &i__4, &a[j * a_dim1 + 1], + lda, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + sgemm_("No Transpose", "No Transpose", &top, &i__6, & + i__4, &c_b15, &a[j * a_dim1 + 1], lda, &work[ + ppwo], &i__7, &c_b14, &work[pw], &top); + i__6 = nnb << 1; + slacpy_("All", &top, &i__6, &work[pw], &top, &a[j * + a_dim1 + 1], lda); + } + ppwo += (nnb << 2) * nnb; + } + + j = *ihi - nblst + 1; + sgemm_("No Transpose", "No Transpose", &top, &nblst, &nblst, & + c_b15, &b[j * b_dim1 + 1], ldb, &work[1], &nblst, & + c_b14, &work[pw], &top); + slacpy_("All", &top, &nblst, &work[pw], &top, &b[j * b_dim1 + + 1], ldb); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__5 = jcol + 1; + i__3 = -nnb; + for (j = j0; i__3 < 0 ? j >= i__5 : j <= i__5; j += i__3) { + if (blk22) { + +/* Exploit the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + sorm22_("Right", "No Transpose", &top, &i__6, &nnb, & + nnb, &work[ppwo], &i__4, &b[j * b_dim1 + 1], + ldb, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + sgemm_("No Transpose", "No Transpose", &top, &i__6, & + i__4, &c_b15, &b[j * b_dim1 + 1], ldb, &work[ + ppwo], &i__7, &c_b14, &work[pw], &top); + i__6 = nnb << 1; + slacpy_("All", &top, &i__6, &work[pw], &top, &b[j * + b_dim1 + 1], ldb); + } + ppwo += (nnb << 2) * nnb; + } + } + +/* Apply accumulated orthogonal matrices to Z. */ + + if (wantz) { + j = *ihi - nblst + 1; + if (initq) { +/* Computing MAX */ + i__3 = 2, i__5 = j - jcol + 1; + topq = f2cmax(i__3,i__5); + nh = *ihi - topq + 1; + } else { + topq = 1; + nh = *n; + } + sgemm_("No Transpose", "No Transpose", &nh, &nblst, &nblst, & + c_b15, &z__[topq + j * z_dim1], ldz, &work[1], &nblst, + &c_b14, &work[pw], &nh); + slacpy_("All", &nh, &nblst, &work[pw], &nh, &z__[topq + j * + z_dim1], ldz); + ppwo = nblst * nblst + 1; + j0 = j - nnb; + i__3 = jcol + 1; + i__5 = -nnb; + for (j = j0; i__5 < 0 ? j >= i__3 : j <= i__3; j += i__5) { + if (initq) { +/* Computing MAX */ + i__6 = 2, i__4 = j - jcol + 1; + topq = f2cmax(i__6,i__4); + nh = *ihi - topq + 1; + } + if (blk22) { + +/* Exploit the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = *lwork - pw + 1; + sorm22_("Right", "No Transpose", &nh, &i__6, &nnb, & + nnb, &work[ppwo], &i__4, &z__[topq + j * + z_dim1], ldz, &work[pw], &i__7, &ierr); + } else { + +/* Ignore the structure of U. */ + + i__6 = nnb << 1; + i__4 = nnb << 1; + i__7 = nnb << 1; + sgemm_("No Transpose", "No Transpose", &nh, &i__6, & + i__4, &c_b15, &z__[topq + j * z_dim1], ldz, & + work[ppwo], &i__7, &c_b14, &work[pw], &nh); + i__6 = nnb << 1; + slacpy_("All", &nh, &i__6, &work[pw], &nh, &z__[topq + + j * z_dim1], ldz); + } + ppwo += (nnb << 2) * nnb; + } + } + } + } + +/* Use unblocked code to reduce the rest of the matrix */ +/* Avoid re-initialization of modified Q and Z. */ + + *(unsigned char *)compq2 = *(unsigned char *)compq; + *(unsigned char *)compz2 = *(unsigned char *)compz; + if (jcol != *ilo) { + if (wantq) { + *(unsigned char *)compq2 = 'V'; + } + if (wantz) { + *(unsigned char *)compz2 = 'V'; + } + } + + if (jcol < *ihi) { + sgghrd_(compq2, compz2, n, &jcol, ihi, &a[a_offset], lda, &b[b_offset] + , ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &ierr); + } + work[1] = (real) lwkopt; + + return 0; + +/* End of SGGHD3 */ + +} /* sgghd3_ */ + diff --git a/lapack-netlib/SRC/sgghrd.c b/lapack-netlib/SRC/sgghrd.c new file mode 100644 index 000000000..6c705831f --- /dev/null +++ b/lapack-netlib/SRC/sgghrd.c @@ -0,0 +1,784 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGGHRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGHRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, */ +/* LDQ, Z, LDZ, INFO ) */ + +/* CHARACTER COMPQ, COMPZ */ +/* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N */ +/* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGHRD reduces a pair of real matrices (A,B) to generalized upper */ +/* > Hessenberg form using orthogonal transformations, where A is a */ +/* > general matrix and B is upper triangular. The form of the */ +/* > generalized eigenvalue problem is */ +/* > A*x = lambda*B*x, */ +/* > and B is typically made upper triangular by computing its QR */ +/* > factorization and moving the orthogonal matrix Q to the left side */ +/* > of the equation. */ +/* > */ +/* > This subroutine simultaneously reduces A to a Hessenberg matrix H: */ +/* > Q**T*A*Z = H */ +/* > and transforms B to another upper triangular matrix T: */ +/* > Q**T*B*Z = T */ +/* > in order to reduce the problem to its standard form */ +/* > H*y = lambda*T*y */ +/* > where y = Z**T*x. */ +/* > */ +/* > The orthogonal matrices Q and Z are determined as products of Givens */ +/* > rotations. They may either be formed explicitly, or they may be */ +/* > postmultiplied into input matrices Q1 and Z1, so that */ +/* > */ +/* > Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T */ +/* > */ +/* > Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T */ +/* > */ +/* > If Q1 is the orthogonal matrix from the QR factorization of B in the */ +/* > original equation A*x = lambda*B*x, then SGGHRD reduces the original */ +/* > problem to generalized Hessenberg form. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPQ */ +/* > \verbatim */ +/* > COMPQ is CHARACTER*1 */ +/* > = 'N': do not compute Q; */ +/* > = 'I': Q is initialized to the unit matrix, and the */ +/* > orthogonal matrix Q is returned; */ +/* > = 'V': Q must contain an orthogonal matrix Q1 on entry, */ +/* > and the product Q1*Q is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': do not compute Z; */ +/* > = 'I': Z is initialized to the unit matrix, and the */ +/* > orthogonal matrix Z is returned; */ +/* > = 'V': Z must contain an orthogonal matrix Z1 on entry, */ +/* > and the product Z1*Z is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > ILO and IHI mark the rows and columns of A which are to be */ +/* > reduced. It is assumed that A is already upper triangular */ +/* > in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are */ +/* > normally set by a previous call to SGGBAL; otherwise they */ +/* > should be set to 1 and N respectively. */ +/* > 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA, N) */ +/* > On entry, the N-by-N general matrix to be reduced. */ +/* > On exit, the upper triangle and the first subdiagonal of A */ +/* > are overwritten with the upper Hessenberg matrix H, and the */ +/* > rest is set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB, N) */ +/* > On entry, the N-by-N upper triangular matrix B. */ +/* > On exit, the upper triangular matrix T = Q**T B Z. The */ +/* > elements below the diagonal are set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is REAL array, dimension (LDQ, N) */ +/* > On entry, if COMPQ = 'V', the orthogonal matrix Q1, */ +/* > typically from the QR factorization of B. */ +/* > On exit, if COMPQ='I', the orthogonal matrix Q, and if */ +/* > COMPQ = 'V', the product Q1*Q. */ +/* > Not referenced if COMPQ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. */ +/* > LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is REAL array, dimension (LDZ, N) */ +/* > On entry, if COMPZ = 'V', the orthogonal matrix Z1. */ +/* > On exit, if COMPZ='I', the orthogonal matrix Z, and if */ +/* > COMPZ = 'V', the product Z1*Z. */ +/* > Not referenced if COMPZ='N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. */ +/* > LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This routine reduces A to Hessenberg and B to triangular form by */ +/* > an unblocked reduction, as described in _Matrix_Computations_, */ +/* > by Golub and Van Loan (Johns Hopkins Press.) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sgghrd_(char *compq, char *compz, integer *n, integer * + ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real + *q, integer *ldq, real *z__, integer *ldz, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, + z_offset, i__1, i__2, i__3; + + /* Local variables */ + integer jcol; + real temp; + integer jrow; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + real c__, s; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer icompq; + extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, + real *, real *, integer *), slartg_(real *, real *, real * + , real *, real *); + integer icompz; + logical ilq, ilz; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Decode COMPQ */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + + /* Function Body */ + if (lsame_(compq, "N")) { + ilq = FALSE_; + icompq = 1; + } else if (lsame_(compq, "V")) { + ilq = TRUE_; + icompq = 2; + } else if (lsame_(compq, "I")) { + ilq = TRUE_; + icompq = 3; + } else { + icompq = 0; + } + +/* Decode COMPZ */ + + if (lsame_(compz, "N")) { + ilz = FALSE_; + icompz = 1; + } else if (lsame_(compz, "V")) { + ilz = TRUE_; + icompz = 2; + } else if (lsame_(compz, "I")) { + ilz = TRUE_; + icompz = 3; + } else { + icompz = 0; + } + +/* Test the input parameters. */ + + *info = 0; + if (icompq <= 0) { + *info = -1; + } else if (icompz <= 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1) { + *info = -4; + } else if (*ihi > *n || *ihi < *ilo - 1) { + *info = -5; + } else if (*lda < f2cmax(1,*n)) { + *info = -7; + } else if (*ldb < f2cmax(1,*n)) { + *info = -9; + } else if (ilq && *ldq < *n || *ldq < 1) { + *info = -11; + } else if (ilz && *ldz < *n || *ldz < 1) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGHRD", &i__1, (ftnlen)6); + return 0; + } + +/* Initialize Q and Z if desired. */ + + if (icompq == 3) { + slaset_("Full", n, n, &c_b10, &c_b11, &q[q_offset], ldq); + } + if (icompz == 3) { + slaset_("Full", n, n, &c_b10, &c_b11, &z__[z_offset], ldz); + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + +/* Zero out lower triangle of B */ + + i__1 = *n - 1; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = jcol + 1; jrow <= i__2; ++jrow) { + b[jrow + jcol * b_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + +/* Reduce A and B */ + + i__1 = *ihi - 2; + for (jcol = *ilo; jcol <= i__1; ++jcol) { + + i__2 = jcol + 2; + for (jrow = *ihi; jrow >= i__2; --jrow) { + +/* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */ + + temp = a[jrow - 1 + jcol * a_dim1]; + slartg_(&temp, &a[jrow + jcol * a_dim1], &c__, &s, &a[jrow - 1 + + jcol * a_dim1]); + a[jrow + jcol * a_dim1] = 0.f; + i__3 = *n - jcol; + srot_(&i__3, &a[jrow - 1 + (jcol + 1) * a_dim1], lda, &a[jrow + ( + jcol + 1) * a_dim1], lda, &c__, &s); + i__3 = *n + 2 - jrow; + srot_(&i__3, &b[jrow - 1 + (jrow - 1) * b_dim1], ldb, &b[jrow + ( + jrow - 1) * b_dim1], ldb, &c__, &s); + if (ilq) { + srot_(n, &q[(jrow - 1) * q_dim1 + 1], &c__1, &q[jrow * q_dim1 + + 1], &c__1, &c__, &s); + } + +/* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */ + + temp = b[jrow + jrow * b_dim1]; + slartg_(&temp, &b[jrow + (jrow - 1) * b_dim1], &c__, &s, &b[jrow + + jrow * b_dim1]); + b[jrow + (jrow - 1) * b_dim1] = 0.f; + srot_(ihi, &a[jrow * a_dim1 + 1], &c__1, &a[(jrow - 1) * a_dim1 + + 1], &c__1, &c__, &s); + i__3 = jrow - 1; + srot_(&i__3, &b[jrow * b_dim1 + 1], &c__1, &b[(jrow - 1) * b_dim1 + + 1], &c__1, &c__, &s); + if (ilz) { + srot_(n, &z__[jrow * z_dim1 + 1], &c__1, &z__[(jrow - 1) * + z_dim1 + 1], &c__1, &c__, &s); + } +/* L30: */ + } +/* L40: */ + } + + return 0; + +/* End of SGGHRD */ + +} /* sgghrd_ */ + diff --git a/lapack-netlib/SRC/sgglse.c b/lapack-netlib/SRC/sgglse.c new file mode 100644 index 000000000..bf637879d --- /dev/null +++ b/lapack-netlib/SRC/sgglse.c @@ -0,0 +1,786 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGGLSE solves overdetermined or underdetermined systems for OTHER matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGLSE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* REAL A( LDA, * ), B( LDB, * ), C( * ), D( * ), */ +/* $ WORK( * ), X( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGLSE solves the linear equality-constrained least squares (LSE) */ +/* > problem: */ +/* > */ +/* > minimize || c - A*x ||_2 subject to B*x = d */ +/* > */ +/* > where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */ +/* > M-vector, and d is a given P-vector. It is assumed that */ +/* > P <= N <= M+P, and */ +/* > */ +/* > rank(B) = P and rank( (A) ) = N. */ +/* > ( (B) ) */ +/* > */ +/* > These conditions ensure that the LSE problem has a unique solution, */ +/* > which is obtained using a generalized RQ factorization of the */ +/* > matrices (B, A) given by */ +/* > */ +/* > B = (0 R)*Q, A = Z*T*Q. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. 0 <= P <= N <= M+P. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(M,N)-by-N upper trapezoidal matrix T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */ +/* > contains the P-by-P upper triangular matrix R. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (M) */ +/* > On entry, C contains the right hand side vector for the */ +/* > least squares part of the LSE problem. */ +/* > On exit, the residual sum of squares for the solution */ +/* > is given by the sum of squares of elements N-P+1 to M of */ +/* > vector C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (P) */ +/* > On entry, D contains the right hand side vector for the */ +/* > constrained equation. */ +/* > On exit, D is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is REAL array, dimension (N) */ +/* > On exit, X is the solution of the LSE problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,M+N+P). */ +/* > For optimum performance LWORK >= P+f2cmin(M,N)+f2cmax(M,N)*NB, */ +/* > where NB is an upper bound for the optimal blocksizes for */ +/* > SGEQRF, SGERQF, SORMQR and SORMRQ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > = 1: the upper triangular factor R associated with B in the */ +/* > generalized RQ factorization of the pair (B, A) is */ +/* > singular, so that rank(B) < P; the least squares */ +/* > solution could not be computed. */ +/* > = 2: the (N-P) by (N-P) part of the upper trapezoidal factor */ +/* > T associated with A in the generalized RQ factorization */ +/* > of the pair (B, A) is singular, so that */ +/* > rank( (A) ) < N; the least squares solution could not */ +/* > ( (B) ) */ +/* > be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realOTHERsolve */ + +/* ===================================================================== */ +/* Subroutine */ int sgglse_(integer *m, integer *n, integer *p, real *a, + integer *lda, real *b, integer *ldb, real *c__, real *d__, real *x, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer lopt; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), + saxpy_(integer *, real *, real *, integer *, real *, integer *), + strmv_(char *, char *, char *, integer *, real *, integer *, real + *, integer *); + integer nb, mn, nr; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sggrqf_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, real *, real *, integer * + , integer *); + integer lwkmin, nb1, nb2, nb3, nb4, lwkopt; + logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *), sormrq_(char *, char *, + integer *, integer *, integer *, real *, integer *, real *, real * + , integer *, real *, integer *, integer *), + strtrs_(char *, char *, char *, integer *, integer *, real *, + integer *, real *, integer *, integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --c__; + --d__; + --x; + --work; + + /* Function Body */ + *info = 0; + mn = f2cmin(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*p < 0 || *p > *n || *p < *n - *m) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*p)) { + *info = -7; + } + +/* Calculate workspace */ + + if (*info == 0) { + if (*n == 0) { + lwkmin = 1; + lwkopt = 1; + } else { + nb1 = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb2 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + nb3 = ilaenv_(&c__1, "SORMQR", " ", m, n, p, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb4 = ilaenv_(&c__1, "SORMRQ", " ", m, n, p, &c_n1, (ftnlen)6, ( + ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2), i__1 = f2cmax(i__1,nb3); + nb = f2cmax(i__1,nb4); + lwkmin = *m + *n + *p; + lwkopt = *p + mn + f2cmax(*m,*n) * nb; + } + work[1] = (real) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGLSE", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Compute the GRQ factorization of matrices B and A: */ + +/* B*Q**T = ( 0 T12 ) P Z**T*A*Q**T = ( R11 R12 ) N-P */ +/* N-P P ( 0 R22 ) M+P-N */ +/* N-P P */ + +/* where T12 and R11 are upper triangular, and Q and Z are */ +/* orthogonal. */ + + i__1 = *lwork - *p - mn; + sggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p + + 1], &work[*p + mn + 1], &i__1, info); + lopt = work[*p + mn + 1]; + +/* Update c = Z**T *c = ( c1 ) N-P */ +/* ( c2 ) M+P-N */ + + i__1 = f2cmax(1,*m); + i__2 = *lwork - *p - mn; + sormqr_("Left", "Transpose", m, &c__1, &mn, &a[a_offset], lda, &work[*p + + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[*p + mn + 1]; + lopt = f2cmax(i__1,i__2); + +/* Solve T12*x2 = d for x2 */ + + if (*p > 0) { + strtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + + 1) * b_dim1 + 1], ldb, &d__[1], p, info); + + if (*info > 0) { + *info = 1; + return 0; + } + +/* Put the solution in X */ + + scopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1); + +/* Update c1 */ + + i__1 = *n - *p; + sgemv_("No transpose", &i__1, p, &c_b31, &a[(*n - *p + 1) * a_dim1 + + 1], lda, &d__[1], &c__1, &c_b33, &c__[1], &c__1); + } + +/* Solve R11*x1 = c1 for x1 */ + + if (*n > *p) { + i__1 = *n - *p; + i__2 = *n - *p; + strtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[ + a_offset], lda, &c__[1], &i__2, info); + + if (*info > 0) { + *info = 2; + return 0; + } + +/* Put the solutions in X */ + + i__1 = *n - *p; + scopy_(&i__1, &c__[1], &c__1, &x[1], &c__1); + } + +/* Compute the residual vector: */ + + if (*m < *n) { + nr = *m + *p - *n; + if (nr > 0) { + i__1 = *n - *m; + sgemv_("No transpose", &nr, &i__1, &c_b31, &a[*n - *p + 1 + (*m + + 1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b33, &c__[*n - + *p + 1], &c__1); + } + } else { + nr = *p; + } + if (nr > 0) { + strmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n + - *p + 1) * a_dim1], lda, &d__[1], &c__1); + saxpy_(&nr, &c_b31, &d__[1], &c__1, &c__[*n - *p + 1], &c__1); + } + +/* Backward transformation x = Q**T*x */ + + i__1 = *lwork - *p - mn; + sormrq_("Left", "Transpose", n, &c__1, p, &b[b_offset], ldb, &work[1], &x[ + 1], n, &work[*p + mn + 1], &i__1, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[*p + mn + 1]; + work[1] = (real) (*p + mn + f2cmax(i__1,i__2)); + + return 0; + +/* End of SGGLSE */ + +} /* sgglse_ */ + diff --git a/lapack-netlib/SRC/sggqrf.c b/lapack-netlib/SRC/sggqrf.c new file mode 100644 index 000000000..d922cf90f --- /dev/null +++ b/lapack-netlib/SRC/sggqrf.c @@ -0,0 +1,718 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGGQRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGQRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, */ +/* LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* REAL A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGQRF computes a generalized QR factorization of an N-by-M matrix A */ +/* > and an N-by-P matrix B: */ +/* > */ +/* > A = Q*R, B = Q*T*Z, */ +/* > */ +/* > where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */ +/* > matrix, and R and T assume one of the forms: */ +/* > */ +/* > if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, */ +/* > ( 0 ) N-M N M-N */ +/* > M */ +/* > */ +/* > where R11 is upper triangular, and */ +/* > */ +/* > if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, */ +/* > P-N N ( T21 ) P */ +/* > P */ +/* > */ +/* > where T12 or T21 is upper triangular. */ +/* > */ +/* > In particular, if B is square and nonsingular, the GQR factorization */ +/* > of A and B implicitly gives the QR factorization of inv(B)*A: */ +/* > */ +/* > inv(B)*A = Z**T*(inv(T)*R) */ +/* > */ +/* > where inv(B) denotes the inverse of the matrix B, and Z**T denotes the */ +/* > transpose of the matrix Z. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of columns of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,M) */ +/* > On entry, the N-by-M matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(N,M)-by-M upper trapezoidal matrix R (R is */ +/* > upper triangular if N >= M); the elements below the diagonal, */ +/* > with the array TAUA, represent the orthogonal matrix Q as a */ +/* > product of f2cmin(N,M) elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUA */ +/* > \verbatim */ +/* > TAUA is REAL array, dimension (f2cmin(N,M)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Q (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,P) */ +/* > On entry, the N-by-P matrix B. */ +/* > On exit, if N <= P, the upper triangle of the subarray */ +/* > B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ +/* > if N > P, the elements on and above the (N-P)-th subdiagonal */ +/* > contain the N-by-P upper trapezoidal matrix T; the remaining */ +/* > elements, with the array TAUB, represent the orthogonal */ +/* > matrix Z as a product of elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUB */ +/* > \verbatim */ +/* > TAUB is REAL array, dimension (f2cmin(N,P)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Z (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */ +/* > For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */ +/* > where NB1 is the optimal blocksize for the QR factorization */ +/* > of an N-by-M matrix, NB2 is the optimal blocksize for the */ +/* > RQ factorization of an N-by-P matrix, and NB3 is the optimal */ +/* > blocksize for a call of SORMQR. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(n,m). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taua * v * v**T */ +/* > */ +/* > where taua is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ +/* > and taua in TAUA(i). */ +/* > To form Q explicitly, use LAPACK subroutine SORGQR. */ +/* > To use Q to update another matrix, use LAPACK subroutine SORMQR. */ +/* > */ +/* > The matrix Z is represented as a product of elementary reflectors */ +/* > */ +/* > Z = H(1) H(2) . . . H(k), where k = f2cmin(n,p). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taub * v * v**T */ +/* > */ +/* > where taub is a real scalar, and v is a real vector with */ +/* > v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in */ +/* > B(n-k+i,1:p-k+i-1), and taub in TAUB(i). */ +/* > To form Z explicitly, use LAPACK subroutine SORGRQ. */ +/* > To use Z to update another matrix, use LAPACK subroutine SORMRQ. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sggqrf_(integer *n, integer *m, integer *p, real *a, + integer *lda, real *taua, real *b, integer *ldb, real *taub, real * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer lopt, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), sgerqf_(integer *, + integer *, real *, integer *, real *, real *, integer *, integer * + ); + integer nb1, nb2, nb3, lwkopt; + logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --taua; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --taub; + --work; + + /* Function Body */ + *info = 0; + nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb2 = ilaenv_(&c__1, "SGERQF", " ", n, p, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb3 = ilaenv_(&c__1, "SORMQR", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2); + nb = f2cmax(i__1,nb3); +/* Computing MAX */ + i__1 = f2cmax(*n,*m); + lwkopt = f2cmax(i__1,*p) * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*p < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*n)) { + *info = -5; + } else if (*ldb < f2cmax(1,*n)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*n), i__1 = f2cmax(i__1,*m); + if (*lwork < f2cmax(i__1,*p) && ! lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGQRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* QR factorization of N-by-M matrix A: A = Q*R */ + + sgeqrf_(n, m, &a[a_offset], lda, &taua[1], &work[1], lwork, info); + lopt = work[1]; + +/* Update B := Q**T*B. */ + + i__1 = f2cmin(*n,*m); + sormqr_("Left", "Transpose", n, p, &i__1, &a[a_offset], lda, &taua[1], &b[ + b_offset], ldb, &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1]; + lopt = f2cmax(i__1,i__2); + +/* RQ factorization of N-by-P matrix B: B = T*Z. */ + + sgerqf_(n, p, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1]; + work[1] = (real) f2cmax(i__1,i__2); + + return 0; + +/* End of SGGQRF */ + +} /* sggqrf_ */ + diff --git a/lapack-netlib/SRC/sggrqf.c b/lapack-netlib/SRC/sggrqf.c new file mode 100644 index 000000000..5780cfdb1 --- /dev/null +++ b/lapack-netlib/SRC/sggrqf.c @@ -0,0 +1,719 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGGRQF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGRQF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, */ +/* LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDB, LWORK, M, N, P */ +/* REAL A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), */ +/* $ WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGRQF computes a generalized RQ factorization of an M-by-N matrix A */ +/* > and a P-by-N matrix B: */ +/* > */ +/* > A = R*Q, B = Z*T*Q, */ +/* > */ +/* > where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal */ +/* > matrix, and R and T assume one of the forms: */ +/* > */ +/* > if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, */ +/* > N-M M ( R21 ) N */ +/* > N */ +/* > */ +/* > where R12 or R21 is upper triangular, and */ +/* > */ +/* > if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, */ +/* > ( 0 ) P-N P N-P */ +/* > N */ +/* > */ +/* > where T11 is upper triangular. */ +/* > */ +/* > In particular, if B is square and nonsingular, the GRQ factorization */ +/* > of A and B implicitly gives the RQ factorization of A*inv(B): */ +/* > */ +/* > A*inv(B) = (R*inv(T))*Z**T */ +/* > */ +/* > where inv(B) denotes the inverse of the matrix B, and Z**T denotes the */ +/* > transpose of the matrix Z. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, if M <= N, the upper triangle of the subarray */ +/* > A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; */ +/* > if M > N, the elements on and above the (M-N)-th subdiagonal */ +/* > contain the M-by-N upper trapezoidal matrix R; the remaining */ +/* > elements, with the array TAUA, represent the orthogonal */ +/* > matrix Q as a product of elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUA */ +/* > \verbatim */ +/* > TAUA is REAL array, dimension (f2cmin(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Q (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the f2cmin(P,N)-by-N upper trapezoidal matrix T (T is */ +/* > upper triangular if P >= N); the elements below the diagonal, */ +/* > with the array TAUB, represent the orthogonal matrix Z as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUB */ +/* > \verbatim */ +/* > TAUB is REAL array, dimension (f2cmin(P,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Z (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N,M,P). */ +/* > For optimum performance LWORK >= f2cmax(N,M,P)*f2cmax(NB1,NB2,NB3), */ +/* > where NB1 is the optimal blocksize for the RQ factorization */ +/* > of an M-by-N matrix, NB2 is the optimal blocksize for the */ +/* > QR factorization of a P-by-N matrix, and NB3 is the optimal */ +/* > blocksize for a call of SORMRQ. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INF0= -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = f2cmin(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taua * v * v**T */ +/* > */ +/* > where taua is a real scalar, and v is a real vector with */ +/* > v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in */ +/* > A(m-k+i,1:n-k+i-1), and taua in TAUA(i). */ +/* > To form Q explicitly, use LAPACK subroutine SORGRQ. */ +/* > To use Q to update another matrix, use LAPACK subroutine SORMRQ. */ +/* > */ +/* > The matrix Z is represented as a product of elementary reflectors */ +/* > */ +/* > Z = H(1) H(2) . . . H(k), where k = f2cmin(p,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - taub * v * v**T */ +/* > */ +/* > where taub is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), */ +/* > and taub in TAUB(i). */ +/* > To form Z explicitly, use LAPACK subroutine SORGQR. */ +/* > To use Z to update another matrix, use LAPACK subroutine SORMQR. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sggrqf_(integer *m, integer *p, integer *n, real *a, + integer *lda, real *taua, real *b, integer *ldb, real *taub, real * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer lopt, nb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), sgerqf_(integer *, + integer *, real *, integer *, real *, real *, integer *, integer * + ); + integer nb1, nb2, nb3, lwkopt; + logical lquery; + extern /* Subroutine */ int sormrq_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --taua; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --taub; + --work; + + /* Function Body */ + *info = 0; + nb1 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb2 = ilaenv_(&c__1, "SGEQRF", " ", p, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + nb3 = ilaenv_(&c__1, "SORMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = f2cmax(nb1,nb2); + nb = f2cmax(i__1,nb3); +/* Computing MAX */ + i__1 = f2cmax(*n,*m); + lwkopt = f2cmax(i__1,*p) * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*p < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < f2cmax(1,*m)) { + *info = -5; + } else if (*ldb < f2cmax(1,*p)) { + *info = -8; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = f2cmax(1,*m), i__1 = f2cmax(i__1,*p); + if (*lwork < f2cmax(i__1,*n) && ! lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGRQF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* RQ factorization of M-by-N matrix A: A = R*Q */ + + sgerqf_(m, n, &a[a_offset], lda, &taua[1], &work[1], lwork, info); + lopt = work[1]; + +/* Update B := B*Q**T */ + + i__1 = f2cmin(*m,*n); +/* Computing MAX */ + i__2 = 1, i__3 = *m - *n + 1; + sormrq_("Right", "Transpose", p, n, &i__1, &a[f2cmax(i__2,i__3) + a_dim1], + lda, &taua[1], &b[b_offset], ldb, &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1]; + lopt = f2cmax(i__1,i__2); + +/* QR factorization of P-by-N matrix B: B = Z*T */ + + sgeqrf_(p, n, &b[b_offset], ldb, &taub[1], &work[1], lwork, info); +/* Computing MAX */ + i__1 = lopt, i__2 = (integer) work[1]; + work[1] = (real) f2cmax(i__1,i__2); + + return 0; + +/* End of SGGRQF */ + +} /* sggrqf_ */ + diff --git a/lapack-netlib/SRC/sggsvd3.c b/lapack-netlib/SRC/sggsvd3.c new file mode 100644 index 000000000..081ba6fac --- /dev/null +++ b/lapack-netlib/SRC/sggsvd3.c @@ -0,0 +1,936 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGSVD3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, */ +/* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, */ +/* LWORK, IWORK, INFO ) */ + +/* CHARACTER JOBQ, JOBU, JOBV */ +/* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */ +/* INTEGER IWORK( * ) */ +/* REAL A( LDA, * ), ALPHA( * ), B( LDB, * ), */ +/* $ BETA( * ), Q( LDQ, * ), U( LDU, * ), */ +/* $ V( LDV, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGSVD3 computes the generalized singular value decomposition (GSVD) */ +/* > of an M-by-N real matrix A and P-by-N real matrix B: */ +/* > */ +/* > U**T*A*Q = D1*( 0 R ), V**T*B*Q = D2*( 0 R ) */ +/* > */ +/* > where U, V and Q are orthogonal matrices. */ +/* > Let K+L = the effective numerical rank of the matrix (A**T,B**T)**T, */ +/* > then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and */ +/* > D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the */ +/* > following structures, respectively: */ +/* > */ +/* > If M-K-L >= 0, */ +/* > */ +/* > K L */ +/* > D1 = K ( I 0 ) */ +/* > L ( 0 C ) */ +/* > M-K-L ( 0 0 ) */ +/* > */ +/* > K L */ +/* > D2 = L ( 0 S ) */ +/* > P-L ( 0 0 ) */ +/* > */ +/* > N-K-L K L */ +/* > ( 0 R ) = K ( 0 R11 R12 ) */ +/* > L ( 0 0 R22 ) */ +/* > */ +/* > where */ +/* > */ +/* > C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */ +/* > S = diag( BETA(K+1), ... , BETA(K+L) ), */ +/* > C**2 + S**2 = I. */ +/* > */ +/* > R is stored in A(1:K+L,N-K-L+1:N) on exit. */ +/* > */ +/* > If M-K-L < 0, */ +/* > */ +/* > K M-K K+L-M */ +/* > D1 = K ( I 0 0 ) */ +/* > M-K ( 0 C 0 ) */ +/* > */ +/* > K M-K K+L-M */ +/* > D2 = M-K ( 0 S 0 ) */ +/* > K+L-M ( 0 0 I ) */ +/* > P-L ( 0 0 0 ) */ +/* > */ +/* > N-K-L K M-K K+L-M */ +/* > ( 0 R ) = K ( 0 R11 R12 R13 ) */ +/* > M-K ( 0 0 R22 R23 ) */ +/* > K+L-M ( 0 0 0 R33 ) */ +/* > */ +/* > where */ +/* > */ +/* > C = diag( ALPHA(K+1), ... , ALPHA(M) ), */ +/* > S = diag( BETA(K+1), ... , BETA(M) ), */ +/* > C**2 + S**2 = I. */ +/* > */ +/* > (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored */ +/* > ( 0 R22 R23 ) */ +/* > in B(M-K+1:L,N+M-K-L+1:N) on exit. */ +/* > */ +/* > The routine computes C, S, R, and optionally the orthogonal */ +/* > transformation matrices U, V and Q. */ +/* > */ +/* > In particular, if B is an N-by-N nonsingular matrix, then the GSVD of */ +/* > A and B implicitly gives the SVD of A*inv(B): */ +/* > A*inv(B) = U*(D1*inv(D2))*V**T. */ +/* > If ( A**T,B**T)**T has orthonormal columns, then the GSVD of A and B is */ +/* > also equal to the CS decomposition of A and B. Furthermore, the GSVD */ +/* > can be used to derive the solution of the eigenvalue problem: */ +/* > A**T*A x = lambda* B**T*B x. */ +/* > In some literature, the GSVD of A and B is presented in the form */ +/* > U**T*A*X = ( 0 D1 ), V**T*B*X = ( 0 D2 ) */ +/* > where U and V are orthogonal and X is nonsingular, D1 and D2 are */ +/* > ``diagonal''. The former GSVD form can be converted to the latter */ +/* > form by taking the nonsingular matrix X as */ +/* > */ +/* > X = Q*( I 0 ) */ +/* > ( 0 inv(R) ). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > = 'U': Orthogonal matrix U is computed; */ +/* > = 'N': U is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > = 'V': Orthogonal matrix V is computed; */ +/* > = 'N': V is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBQ */ +/* > \verbatim */ +/* > JOBQ is CHARACTER*1 */ +/* > = 'Q': Orthogonal matrix Q is computed; */ +/* > = 'N': Q is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] L */ +/* > \verbatim */ +/* > L is INTEGER */ +/* > */ +/* > On exit, K and L specify the dimension of the subblocks */ +/* > described in Purpose. */ +/* > K + L = effective numerical rank of (A**T,B**T)**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A contains the triangular matrix R, or part of R. */ +/* > See Purpose for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, B contains the triangular matrix R if M-K-L < 0. */ +/* > See Purpose for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is REAL array, dimension (N) */ +/* > */ +/* > On exit, ALPHA and BETA contain the generalized singular */ +/* > value pairs of A and B; */ +/* > ALPHA(1:K) = 1, */ +/* > BETA(1:K) = 0, */ +/* > and if M-K-L >= 0, */ +/* > ALPHA(K+1:K+L) = C, */ +/* > BETA(K+1:K+L) = S, */ +/* > or if M-K-L < 0, */ +/* > ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 */ +/* > BETA(K+1:M) =S, BETA(M+1:K+L) =1 */ +/* > and */ +/* > ALPHA(K+L+1:N) = 0 */ +/* > BETA(K+L+1:N) = 0 */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is REAL array, dimension (LDU,M) */ +/* > If JOBU = 'U', U contains the M-by-M orthogonal matrix U. */ +/* > If JOBU = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= f2cmax(1,M) if */ +/* > JOBU = 'U'; LDU >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is REAL array, dimension (LDV,P) */ +/* > If JOBV = 'V', V contains the P-by-P orthogonal matrix V. */ +/* > If JOBV = 'N', V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,P) if */ +/* > JOBV = 'V'; LDV >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is REAL array, dimension (LDQ,N) */ +/* > If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. */ +/* > If JOBQ = 'N', Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */ +/* > JOBQ = 'Q'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > On exit, IWORK stores the sorting information. More */ +/* > precisely, the following loop will sort ALPHA */ +/* > for I = K+1, f2cmin(M,K+L) */ +/* > swap ALPHA(I) and ALPHA(IWORK(I)) */ +/* > endfor */ +/* > such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, the Jacobi-type procedure failed to */ +/* > converge. For further details, see subroutine STGSJA. */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > TOLA REAL */ +/* > TOLB REAL */ +/* > TOLA and TOLB are the thresholds to determine the effective */ +/* > rank of (A**T,B**T)**T. Generally, they are set to */ +/* > TOLA = MAX(M,N)*norm(A)*MACHEPS, */ +/* > TOLB = MAX(P,N)*norm(B)*MACHEPS. */ +/* > The size of TOLA and TOLB may affect the size of backward */ +/* > errors of the decomposition. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date August 2015 */ + +/* > \ingroup realGEsing */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > SGGSVD3 replaces the deprecated subroutine SGGSVD. */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sggsvd3_(char *jobu, char *jobv, char *jobq, integer *m, + integer *n, integer *p, integer *k, integer *l, real *a, integer *lda, + real *b, integer *ldb, real *alpha, real *beta, real *u, integer * + ldu, real *v, integer *ldv, real *q, integer *ldq, real *work, + integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, + u_offset, v_dim1, v_offset, i__1, i__2; + + /* Local variables */ + integer ibnd; + real tola; + integer isub; + real tolb, unfl, temp, smax; + integer ncallmycycle, i__, j; + extern logical lsame_(char *, char *); + real anorm, bnorm; + logical wantq; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical wantu, wantv; + extern real slamch_(char *), slange_(char *, integer *, integer *, + real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), stgsja_( + char *, char *, char *, integer *, integer *, integer *, integer * + , integer *, real *, integer *, real *, integer *, real *, real *, + real *, real *, real *, integer *, real *, integer *, real *, + integer *, real *, integer *, integer *); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int sggsvp3_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, integer *, real * + , real *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, integer *, real *, real *, integer * + , integer *); + real ulp; + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* August 2015 */ + + +/* ===================================================================== */ + + +/* Decode and test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + --alpha; + --beta; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --work; + --iwork; + + /* Function Body */ + wantu = lsame_(jobu, "U"); + wantv = lsame_(jobv, "V"); + wantq = lsame_(jobq, "Q"); + lquery = *lwork == -1; + lwkopt = 1; + +/* Test the input arguments */ + + *info = 0; + if (! (wantu || lsame_(jobu, "N"))) { + *info = -1; + } else if (! (wantv || lsame_(jobv, "N"))) { + *info = -2; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*p < 0) { + *info = -6; + } else if (*lda < f2cmax(1,*m)) { + *info = -10; + } else if (*ldb < f2cmax(1,*p)) { + *info = -12; + } else if (*ldu < 1 || wantu && *ldu < *m) { + *info = -16; + } else if (*ldv < 1 || wantv && *ldv < *p) { + *info = -18; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -20; + } else if (*lwork < 1 && ! lquery) { + *info = -24; + } + +/* Compute workspace */ + + if (*info == 0) { + sggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], + ldb, &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, + &q[q_offset], ldq, &iwork[1], &work[1], &work[1], &c_n1, + info); + lwkopt = *n + (integer) work[1]; +/* Computing MAX */ + i__1 = *n << 1; + lwkopt = f2cmax(i__1,lwkopt); + lwkopt = f2cmax(1,lwkopt); + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGSVD3", &i__1, (ftnlen)7); + return 0; + } + if (lquery) { + return 0; + } + +/* Compute the Frobenius norm of matrices A and B */ + + anorm = slange_("1", m, n, &a[a_offset], lda, &work[1]); + bnorm = slange_("1", p, n, &b[b_offset], ldb, &work[1]); + +/* Get machine precision and set up threshold for determining */ +/* the effective numerical rank of the matrices A and B. */ + + ulp = slamch_("Precision"); + unfl = slamch_("Safe Minimum"); + tola = f2cmax(*m,*n) * f2cmax(anorm,unfl) * ulp; + tolb = f2cmax(*p,*n) * f2cmax(bnorm,unfl) * ulp; + +/* Preprocessing */ + + i__1 = *lwork - *n; + sggsvp3_(jobu, jobv, jobq, m, p, n, &a[a_offset], lda, &b[b_offset], ldb, + &tola, &tolb, k, l, &u[u_offset], ldu, &v[v_offset], ldv, &q[ + q_offset], ldq, &iwork[1], &work[1], &work[*n + 1], &i__1, info); + +/* Compute the GSVD of two upper "triangular" matrices */ + + stgsja_(jobu, jobv, jobq, m, p, n, k, l, &a[a_offset], lda, &b[b_offset], + ldb, &tola, &tolb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[ + v_offset], ldv, &q[q_offset], ldq, &work[1], &ncallmycycle, info); + +/* Sort the singular values and store the pivot indices in IWORK */ +/* Copy ALPHA to WORK, then sort ALPHA in WORK */ + + scopy_(n, &alpha[1], &c__1, &work[1], &c__1); +/* Computing MIN */ + i__1 = *l, i__2 = *m - *k; + ibnd = f2cmin(i__1,i__2); + i__1 = ibnd; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for largest ALPHA(K+I) */ + + isub = i__; + smax = work[*k + i__]; + i__2 = ibnd; + for (j = i__ + 1; j <= i__2; ++j) { + temp = work[*k + j]; + if (temp > smax) { + isub = j; + smax = temp; + } +/* L10: */ + } + if (isub != i__) { + work[*k + isub] = work[*k + i__]; + work[*k + i__] = smax; + iwork[*k + i__] = *k + isub; + } else { + iwork[*k + i__] = *k + i__; + } +/* L20: */ + } + + work[1] = (real) lwkopt; + return 0; + +/* End of SGGSVD3 */ + +} /* sggsvd3_ */ + diff --git a/lapack-netlib/SRC/sggsvp3.c b/lapack-netlib/SRC/sggsvp3.c new file mode 100644 index 000000000..99899fa8a --- /dev/null +++ b/lapack-netlib/SRC/sggsvp3.c @@ -0,0 +1,1056 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGGSVP3 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGGSVP3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, */ +/* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, */ +/* IWORK, TAU, WORK, LWORK, INFO ) */ + +/* CHARACTER JOBQ, JOBU, JOBV */ +/* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK */ +/* REAL TOLA, TOLB */ +/* INTEGER IWORK( * ) */ +/* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), */ +/* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGGSVP3 computes orthogonal matrices U, V and Q such that */ +/* > */ +/* > N-K-L K L */ +/* > U**T*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; */ +/* > L ( 0 0 A23 ) */ +/* > M-K-L ( 0 0 0 ) */ +/* > */ +/* > N-K-L K L */ +/* > = K ( 0 A12 A13 ) if M-K-L < 0; */ +/* > M-K ( 0 0 A23 ) */ +/* > */ +/* > N-K-L K L */ +/* > V**T*B*Q = L ( 0 0 B13 ) */ +/* > P-L ( 0 0 0 ) */ +/* > */ +/* > where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */ +/* > upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */ +/* > otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective */ +/* > numerical rank of the (M+P)-by-N matrix (A**T,B**T)**T. */ +/* > */ +/* > This decomposition is the preprocessing step for computing the */ +/* > Generalized Singular Value Decomposition (GSVD), see subroutine */ +/* > SGGSVD3. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > = 'U': Orthogonal matrix U is computed; */ +/* > = 'N': U is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > = 'V': Orthogonal matrix V is computed; */ +/* > = 'N': V is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBQ */ +/* > \verbatim */ +/* > JOBQ is CHARACTER*1 */ +/* > = 'Q': Orthogonal matrix Q is computed; */ +/* > = 'N': Q is not computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] P */ +/* > \verbatim */ +/* > P is INTEGER */ +/* > The number of rows of the matrix B. P >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A contains the triangular (or trapezoidal) matrix */ +/* > described in the Purpose section. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,N) */ +/* > On entry, the P-by-N matrix B. */ +/* > On exit, B contains the triangular matrix described in */ +/* > the Purpose section. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,P). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOLA */ +/* > \verbatim */ +/* > TOLA is REAL */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOLB */ +/* > \verbatim */ +/* > TOLB is REAL */ +/* > */ +/* > TOLA and TOLB are the thresholds to determine the effective */ +/* > numerical rank of matrix B and a subblock of A. Generally, */ +/* > they are set to */ +/* > TOLA = MAX(M,N)*norm(A)*MACHEPS, */ +/* > TOLB = MAX(P,N)*norm(B)*MACHEPS. */ +/* > The size of TOLA and TOLB may affect the size of backward */ +/* > errors of the decomposition. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[out] L */ +/* > \verbatim */ +/* > L is INTEGER */ +/* > */ +/* > On exit, K and L specify the dimension of the subblocks */ +/* > described in Purpose section. */ +/* > K + L = effective numerical rank of (A**T,B**T)**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is REAL array, dimension (LDU,M) */ +/* > If JOBU = 'U', U contains the orthogonal matrix U. */ +/* > If JOBU = 'N', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= f2cmax(1,M) if */ +/* > JOBU = 'U'; LDU >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is REAL array, dimension (LDV,P) */ +/* > If JOBV = 'V', V contains the orthogonal matrix V. */ +/* > If JOBV = 'N', V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. LDV >= f2cmax(1,P) if */ +/* > JOBV = 'V'; LDV >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is REAL array, dimension (LDQ,N) */ +/* > If JOBQ = 'Q', Q contains the orthogonal matrix Q. */ +/* > If JOBQ = 'N', Q is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= f2cmax(1,N) if */ +/* > JOBQ = 'Q'; LDQ >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date August 2015 */ + +/* > \ingroup realOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The subroutine uses LAPACK subroutine SGEQP3 for the QR factorization */ +/* > with column pivoting to detect the effective numerical rank of the */ +/* > a matrix. It may be replaced by a better rank determination strategy. */ +/* > */ +/* > SGGSVP3 replaces the deprecated subroutine SGGSVP. */ +/* > */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int sggsvp3_(char *jobu, char *jobv, char *jobq, integer *m, + integer *p, integer *n, real *a, integer *lda, real *b, integer *ldb, + real *tola, real *tolb, integer *k, integer *l, real *u, integer *ldu, + real *v, integer *ldv, real *q, integer *ldq, integer *iwork, real * + tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, + u_offset, v_dim1, v_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *); + logical wantq, wantu, wantv; + extern /* Subroutine */ int sgeqp3_(integer *, integer *, real *, integer + *, integer *, real *, real *, integer *, integer *), sgeqr2_( + integer *, integer *, real *, integer *, real *, real *, integer * + ), sgerq2_(integer *, integer *, real *, integer *, real *, real * + , integer *), sorg2r_(integer *, integer *, integer *, real *, + integer *, real *, real *, integer *), sorm2r_(char *, char *, + integer *, integer *, integer *, real *, integer *, real *, real * + , integer *, real *, integer *), sormr2_(char *, + char *, integer *, integer *, integer *, real *, integer *, real * + , real *, integer *, real *, integer *), xerbla_( + char *, integer *, ftnlen), slacpy_(char *, integer *, integer *, + real *, integer *, real *, integer *), slaset_(char *, + integer *, integer *, real *, real *, real *, integer *), + slapmt_(logical *, integer *, integer *, real *, integer *, + integer *); + logical forwrd; + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* August 2015 */ + + + +/* ===================================================================== */ + + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1 * 1; + u -= u_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + --iwork; + --tau; + --work; + + /* Function Body */ + wantu = lsame_(jobu, "U"); + wantv = lsame_(jobv, "V"); + wantq = lsame_(jobq, "Q"); + forwrd = TRUE_; + lquery = *lwork == -1; + lwkopt = 1; + +/* Test the input arguments */ + + *info = 0; + if (! (wantu || lsame_(jobu, "N"))) { + *info = -1; + } else if (! (wantv || lsame_(jobv, "N"))) { + *info = -2; + } else if (! (wantq || lsame_(jobq, "N"))) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*p < 0) { + *info = -5; + } else if (*n < 0) { + *info = -6; + } else if (*lda < f2cmax(1,*m)) { + *info = -8; + } else if (*ldb < f2cmax(1,*p)) { + *info = -10; + } else if (*ldu < 1 || wantu && *ldu < *m) { + *info = -16; + } else if (*ldv < 1 || wantv && *ldv < *p) { + *info = -18; + } else if (*ldq < 1 || wantq && *ldq < *n) { + *info = -20; + } else if (*lwork < 1 && ! lquery) { + *info = -24; + } + +/* Compute workspace */ + + if (*info == 0) { + sgeqp3_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], &c_n1, + info); + lwkopt = (integer) work[1]; + if (wantv) { + lwkopt = f2cmax(lwkopt,*p); + } +/* Computing MAX */ + i__1 = lwkopt, i__2 = f2cmin(*n,*p); + lwkopt = f2cmax(i__1,i__2); + lwkopt = f2cmax(lwkopt,*m); + if (wantq) { + lwkopt = f2cmax(lwkopt,*n); + } + sgeqp3_(m, n, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], &c_n1, + info); +/* Computing MAX */ + i__1 = lwkopt, i__2 = (integer) work[1]; + lwkopt = f2cmax(i__1,i__2); + lwkopt = f2cmax(1,lwkopt); + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGGSVP3", &i__1, (ftnlen)7); + return 0; + } + if (lquery) { + return 0; + } + +/* QR with column pivoting of B: B*P = V*( S11 S12 ) */ +/* ( 0 0 ) */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; +/* L10: */ + } + sgeqp3_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], lwork, + info); + +/* Update A := A*P */ + + slapmt_(&forwrd, m, n, &a[a_offset], lda, &iwork[1]); + +/* Determine the effective rank of matrix B. */ + + *l = 0; + i__1 = f2cmin(*p,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = b[i__ + i__ * b_dim1], abs(r__1)) > *tolb) { + ++(*l); + } +/* L20: */ + } + + if (wantv) { + +/* Copy the details of V, and form V. */ + + slaset_("Full", p, p, &c_b14, &c_b14, &v[v_offset], ldv); + if (*p > 1) { + i__1 = *p - 1; + slacpy_("Lower", &i__1, n, &b[b_dim1 + 2], ldb, &v[v_dim1 + 2], + ldv); + } + i__1 = f2cmin(*p,*n); + sorg2r_(p, p, &i__1, &v[v_offset], ldv, &tau[1], &work[1], info); + } + +/* Clean up B */ + + i__1 = *l - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *l; + for (i__ = j + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + if (*p > *l) { + i__1 = *p - *l; + slaset_("Full", &i__1, n, &c_b14, &c_b14, &b[*l + 1 + b_dim1], ldb); + } + + if (wantq) { + +/* Set Q = I and Update Q := Q*P */ + + slaset_("Full", n, n, &c_b14, &c_b24, &q[q_offset], ldq); + slapmt_(&forwrd, n, n, &q[q_offset], ldq, &iwork[1]); + } + + if (*p >= *l && *n != *l) { + +/* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z */ + + sgerq2_(l, n, &b[b_offset], ldb, &tau[1], &work[1], info); + +/* Update A := A*Z**T */ + + sormr2_("Right", "Transpose", m, n, l, &b[b_offset], ldb, &tau[1], &a[ + a_offset], lda, &work[1], info); + + if (wantq) { + +/* Update Q := Q*Z**T */ + + sormr2_("Right", "Transpose", n, n, l, &b[b_offset], ldb, &tau[1], + &q[q_offset], ldq, &work[1], info); + } + +/* Clean up B */ + + i__1 = *n - *l; + slaset_("Full", l, &i__1, &c_b14, &c_b14, &b[b_offset], ldb); + i__1 = *n; + for (j = *n - *l + 1; j <= i__1; ++j) { + i__2 = *l; + for (i__ = j - *n + *l + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L50: */ + } +/* L60: */ + } + + } + +/* Let N-L L */ +/* A = ( A11 A12 ) M, */ + +/* then the following does the complete QR decomposition of A11: */ + +/* A11 = U*( 0 T12 )*P1**T */ +/* ( 0 0 ) */ + + i__1 = *n - *l; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = 0; +/* L70: */ + } + i__1 = *n - *l; + sgeqp3_(m, &i__1, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], lwork, + info); + +/* Determine the effective rank of A11 */ + + *k = 0; +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = f2cmin(i__2,i__3); + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = a[i__ + i__ * a_dim1], abs(r__1)) > *tola) { + ++(*k); + } +/* L80: */ + } + +/* Update A12 := U**T*A12, where A12 = A( 1:M, N-L+1:N ) */ + +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = f2cmin(i__2,i__3); + sorm2r_("Left", "Transpose", m, l, &i__1, &a[a_offset], lda, &tau[1], &a[( + *n - *l + 1) * a_dim1 + 1], lda, &work[1], info); + + if (wantu) { + +/* Copy the details of U, and form U */ + + slaset_("Full", m, m, &c_b14, &c_b14, &u[u_offset], ldu); + if (*m > 1) { + i__1 = *m - 1; + i__2 = *n - *l; + slacpy_("Lower", &i__1, &i__2, &a[a_dim1 + 2], lda, &u[u_dim1 + 2] + , ldu); + } +/* Computing MIN */ + i__2 = *m, i__3 = *n - *l; + i__1 = f2cmin(i__2,i__3); + sorg2r_(m, m, &i__1, &u[u_offset], ldu, &tau[1], &work[1], info); + } + + if (wantq) { + +/* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 */ + + i__1 = *n - *l; + slapmt_(&forwrd, n, &i__1, &q[q_offset], ldq, &iwork[1]); + } + +/* Clean up A: set the strictly lower triangular part of */ +/* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. */ + + i__1 = *k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L90: */ + } +/* L100: */ + } + if (*m > *k) { + i__1 = *m - *k; + i__2 = *n - *l; + slaset_("Full", &i__1, &i__2, &c_b14, &c_b14, &a[*k + 1 + a_dim1], + lda); + } + + if (*n - *l > *k) { + +/* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 */ + + i__1 = *n - *l; + sgerq2_(k, &i__1, &a[a_offset], lda, &tau[1], &work[1], info); + + if (wantq) { + +/* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**T */ + + i__1 = *n - *l; + sormr2_("Right", "Transpose", n, &i__1, k, &a[a_offset], lda, & + tau[1], &q[q_offset], ldq, &work[1], info); + } + +/* Clean up A */ + + i__1 = *n - *l - *k; + slaset_("Full", k, &i__1, &c_b14, &c_b14, &a[a_offset], lda); + i__1 = *n - *l; + for (j = *n - *l - *k + 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = j - *n + *l + *k + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L110: */ + } +/* L120: */ + } + + } + + if (*m > *k) { + +/* QR factorization of A( K+1:M,N-L+1:N ) */ + + i__1 = *m - *k; + sgeqr2_(&i__1, l, &a[*k + 1 + (*n - *l + 1) * a_dim1], lda, &tau[1], & + work[1], info); + + if (wantu) { + +/* Update U(:,K+1:M) := U(:,K+1:M)*U1 */ + + i__1 = *m - *k; +/* Computing MIN */ + i__3 = *m - *k; + i__2 = f2cmin(i__3,*l); + sorm2r_("Right", "No transpose", m, &i__1, &i__2, &a[*k + 1 + (*n + - *l + 1) * a_dim1], lda, &tau[1], &u[(*k + 1) * u_dim1 + + 1], ldu, &work[1], info); + } + +/* Clean up */ + + i__1 = *n; + for (j = *n - *l + 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j - *n + *k + *l + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L130: */ + } +/* L140: */ + } + + } + + work[1] = (real) lwkopt; + return 0; + +/* End of SGGSVP3 */ + +} /* sggsvp3_ */ + diff --git a/lapack-netlib/SRC/sgsvj0.c b/lapack-netlib/SRC/sgsvj0.c new file mode 100644 index 000000000..0a3bf3872 --- /dev/null +++ b/lapack-netlib/SRC/sgsvj0.c @@ -0,0 +1,1592 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGSVJ0 pre-processor for the routine sgesvj. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGSVJ0 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, */ +/* SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) */ + +/* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP */ +/* REAL EPS, SFMIN, TOL */ +/* CHARACTER*1 JOBV */ +/* REAL A( LDA, * ), SVA( N ), D( N ), V( LDV, * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGSVJ0 is called from SGESVJ as a pre-processor and that is its main */ +/* > purpose. It applies Jacobi rotations in the same way as SGESVJ does, but */ +/* > it does not check convergence (stopping criterion). Few tuning */ +/* > parameters (marked by [TP]) are available for the implementer. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether the output from this procedure is used */ +/* > to compute the matrix V: */ +/* > = 'V': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the N-by-N array V. */ +/* > (See the description of V.) */ +/* > = 'A': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the MV-by-N array V. */ +/* > (See the descriptions of MV and V.) */ +/* > = 'N': the Jacobi rotations are not accumulated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. */ +/* > M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, M-by-N matrix A, such that A*diag(D) represents */ +/* > the input matrix. */ +/* > On exit, */ +/* > A_onexit * D_onexit represents the input matrix A*diag(D) */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of D, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The array D accumulates the scaling factors from the fast scaled */ +/* > Jacobi rotations. */ +/* > On entry, A*diag(D) represents the input matrix. */ +/* > On exit, A_onexit*diag(D_onexit) represents the input matrix */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of A, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SVA */ +/* > \verbatim */ +/* > SVA is REAL array, dimension (N) */ +/* > On entry, SVA contains the Euclidean norms of the columns of */ +/* > the matrix A*diag(D). */ +/* > On exit, SVA contains the Euclidean norms of the columns of */ +/* > the matrix onexit*diag(D_onexit). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MV */ +/* > \verbatim */ +/* > MV is INTEGER */ +/* > If JOBV = 'A', then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then MV is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is REAL array, dimension (LDV,N) */ +/* > If JOBV = 'V' then N rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'A' then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V', LDV >= N. */ +/* > If JOBV = 'A', LDV >= MV. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EPS */ +/* > \verbatim */ +/* > EPS is REAL */ +/* > EPS = SLAMCH('Epsilon') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SFMIN */ +/* > \verbatim */ +/* > SFMIN is REAL */ +/* > SFMIN = SLAMCH('Safe Minimum') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOL */ +/* > \verbatim */ +/* > TOL is REAL */ +/* > TOL is the threshold for Jacobi rotations. For a pair */ +/* > A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ +/* > applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NSWEEP */ +/* > \verbatim */ +/* > NSWEEP is INTEGER */ +/* > NSWEEP is the number of sweeps of Jacobi rotations to be */ +/* > performed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > LWORK is the dimension of WORK. LWORK >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup realOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > SGSVJ0 is used just to enable SGESVJ to call a simplified version of */ +/* > itself to work on a submatrix of the original matrix. */ +/* > */ +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ +/* > */ +/* > \par Bugs, Examples and Comments: */ +/* ================================= */ +/* > */ +/* > Please report all bugs and send interesting test examples and comments to */ +/* > drmac@math.hr. Thank you. */ + +/* ===================================================================== */ +/* Subroutine */ int sgsvj0_(char *jobv, integer *m, integer *n, real *a, + integer *lda, real *d__, real *sva, integer *mv, real *v, integer * + ldv, real *eps, real *sfmin, real *tol, integer *nsweep, real *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + real r__1, r__2; + + /* Local variables */ + real aapp, aapq, aaqq; + integer ierr; + real bigtheta; + extern real sdot_(integer *, real *, integer *, real *, integer *); + integer pskipped; + real aapp0, temp1; + extern real snrm2_(integer *, real *, integer *); + integer i__, p, q; + real t, apoaq, aqoap; + extern logical lsame_(char *, char *); + real theta, small, fastr[5]; + logical applv, rsvec; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical rotok; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), saxpy_(integer *, real *, real *, integer *, real *, + integer *), srotm_(integer *, real *, integer *, real *, integer * + , real *); + real rootsfmin, cs, sn; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer ijblsk, swband; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + integer blskip; + real mxaapq, thsign; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + real mxsinj; + integer ir1, emptsw, notrot, iswrot, jbc; + real big; + integer kbl, lkahead, igl, ibr, jgl, nbl, mvl; + real rootbig, rooteps; + integer rowskip; + real roottol; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --sva; + --d__; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + + /* Function Body */ + applv = lsame_(jobv, "A"); + rsvec = lsame_(jobv, "V"); + if (! (rsvec || applv || lsame_(jobv, "N"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || *n > *m) { + *info = -3; + } else if (*lda < *m) { + *info = -5; + } else if ((rsvec || applv) && *mv < 0) { + *info = -8; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -10; + } else if (*tol <= *eps) { + *info = -13; + } else if (*nsweep < 0) { + *info = -14; + } else if (*lwork < *m) { + *info = -16; + } else { + *info = 0; + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGSVJ0", &i__1, (ftnlen)6); + return 0; + } + + if (rsvec) { + mvl = *n; + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + rooteps = sqrt(*eps); + rootsfmin = sqrt(*sfmin); + small = *sfmin / *eps; + big = 1.f / *sfmin; + rootbig = 1.f / rootsfmin; + bigtheta = 1.f / rooteps; + roottol = sqrt(*tol); + + + emptsw = *n * (*n - 1) / 2; + notrot = 0; + fastr[0] = 0.f; + + + swband = 0; +/* [TP] SWBAND is a tuning parameter. It is meaningful and effective */ +/* if SGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure */ +/* ...... */ + kbl = f2cmin(8,*n); +/* [TP] KBL is a tuning parameter that defines the tile size in the */ +/* tiling of the p-q loops of pivot pairs. In general, an optimal */ +/* value of KBL depends on the matrix dimensions and on the */ +/* parameters of the computer's memory. */ + + nbl = *n / kbl; + if (nbl * kbl != *n) { + ++nbl; + } +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1 + 1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + rowskip = f2cmin(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + lkahead = 1; +/* [TP] LKAHEAD is a tuning parameter. */ + swband = 0; + pskipped = 0; + + i__1 = *nsweep; + for (i__ = 1; i__ <= i__1; ++i__) { + + mxaapq = 0.f; + mxsinj = 0.f; + iswrot = 0; + + notrot = 0; + pskipped = 0; + + i__2 = nbl; + for (ibr = 1; ibr <= i__2; ++ibr) { + igl = (ibr - 1) * kbl + 1; + +/* Computing MIN */ + i__4 = lkahead, i__5 = nbl - ibr; + i__3 = f2cmin(i__4,i__5); + for (ir1 = 0; ir1 <= i__3; ++ir1) { + + igl += ir1 * kbl; + +/* Computing MIN */ + i__5 = igl + kbl - 1, i__6 = *n - 1; + i__4 = f2cmin(i__5,i__6); + for (p = igl; p <= i__4; ++p) { + i__5 = *n - p + 1; + q = isamax_(&i__5, &sva[p], &c__1) + p - 1; + if (p != q) { + sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + + 1], &c__1); + if (rsvec) { + sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1); + } + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = d__[p]; + d__[p] = d__[q]; + d__[q] = temp1; + } + + if (ir1 == 0) { + +/* Column norms are periodically updated by explicit */ +/* norm computation. */ +/* Caveat: */ +/* Some BLAS implementations compute SNRM2(M,A(1,p),1) */ +/* as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may result in */ +/* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and */ +/* undeflow for ||A(:,p)||_2 < SQRT(underflow_threshold). */ +/* Hence, SNRM2 cannot be trusted, not even in the case when */ +/* the true norm is far from the under(over)flow boundaries. */ +/* If properly implemented SNRM2 is available, the IF-THEN-ELSE */ +/* below should read "AAPP = SNRM2( M, A(1,p), 1 ) * D(p)". */ + + if (sva[p] < rootbig && sva[p] > rootsfmin) { + sva[p] = snrm2_(m, &a[p * a_dim1 + 1], &c__1) * + d__[p]; + } else { + temp1 = 0.f; + aapp = 1.f; + slassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, & + aapp); + sva[p] = temp1 * sqrt(aapp) * d__[p]; + } + aapp = sva[p]; + } else { + aapp = sva[p]; + } + + if (aapp > 0.f) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = igl + kbl - 1; + i__5 = f2cmin(i__6,*n); + for (q = p + 1; q <= i__5; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.f) { + + aapp0 = aapp; + if (aaqq >= 1.f) { + rotok = small * aapp <= aaqq; + if (aapp < big / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + scopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + slascl_("G", &c__0, &c__0, &aapp, & + d__[p], m, &c__1, &work[1], + lda, &ierr); + aapq = sdot_(m, &work[1], &c__1, &a[q + * a_dim1 + 1], &c__1) * d__[q] + / aaqq; + } + } else { + rotok = aapp <= aaqq / small; + if (aapp > small / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + scopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + slascl_("G", &c__0, &c__0, &aaqq, & + d__[q], m, &c__1, &work[1], + lda, &ierr); + aapq = sdot_(m, &work[1], &c__1, &a[p + * a_dim1 + 1], &c__1) * d__[p] + / aapp; + } + } + +/* Computing MAX */ + r__1 = mxaapq, r__2 = abs(aapq); + mxaapq = f2cmax(r__1,r__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > *tol) { + +/* ROTATED = ROTATED + ONE */ + + if (ir1 == 0) { + notrot = 0; + pskipped = 0; + ++iswrot; + } + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (r__1 = aqoap - apoaq, abs( + r__1)) * -.5f / aapq; + + if (abs(theta) > bigtheta) { + + t = .5f / theta; + fastr[2] = t * d__[p] / d__[q]; + fastr[3] = -t * d__[q] / d__[p]; + srotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(t); + mxsinj = f2cmax(r__1,r__2); + + } else { + + + thsign = -r_sign(&c_b42, &aapq); + t = 1.f / (theta + thsign * sqrt( + theta * theta + 1.f)); + cs = sqrt(1.f / (t * t + 1.f)); + sn = t * cs; + +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(sn); + mxsinj = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); + + apoaq = d__[p] / d__[q]; + aqoap = d__[q] / d__[p]; + if (d__[p] >= 1.f) { + if (d__[q] >= 1.f) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + d__[p] *= cs; + d__[q] *= cs; + srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + d__[p] *= cs; + d__[q] /= cs; + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + } + } else { + if (d__[q] >= 1.f) { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + d__[p] /= cs; + d__[q] *= cs; + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + } else { + if (d__[p] >= d__[q]) { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__[p] *= cs; + d__[q] /= cs; + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__[p] /= cs; + d__[q] *= cs; + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { + scopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + slascl_("G", &c__0, &c__0, &aapp, & + c_b42, m, &c__1, &work[1], + lda, &ierr); + slascl_("G", &c__0, &c__0, &aaqq, & + c_b42, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[p] / d__[q]; + saxpy_(m, &temp1, &work[1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + slascl_("G", &c__0, &c__0, &c_b42, & + aaqq, m, &c__1, &a[q * a_dim1 + + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq * aapq; + sva[q] = aaqq * sqrt((f2cmax(r__1,r__2))) + ; + mxsinj = f2cmax(mxsinj,*sfmin); + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q), SVA(p) */ +/* recompute SVA(q), SVA(p). */ +/* Computing 2nd power */ + r__1 = sva[q] / aaqq; + if (r__1 * r__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = snrm2_(m, &a[q * a_dim1 + + 1], &c__1) * d__[q]; + } else { + t = 0.f; + aaqq = 1.f; + slassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * d__[q]; + } + } + if (aapp / aapp0 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = snrm2_(m, &a[p * a_dim1 + + 1], &c__1) * d__[p]; + } else { + t = 0.f; + aapp = 1.f; + slassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * d__[p]; + } + sva[p] = aapp; + } + + } else { +/* A(:,p) and A(:,q) already numerically orthogonal */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + } else { +/* A(:,q) is zero column */ + if (ir1 == 0) { + ++notrot; + } + ++pskipped; + } + + if (i__ <= swband && pskipped > rowskip) { + if (ir1 == 0) { + aapp = -aapp; + } + notrot = 0; + goto L2103; + } + +/* L2002: */ + } +/* END q-LOOP */ + +L2103: +/* bailed out of q-loop */ + sva[p] = aapp; + } else { + sva[p] = aapp; + if (ir1 == 0 && aapp == 0.f) { +/* Computing MIN */ + i__5 = igl + kbl - 1; + notrot = notrot + f2cmin(i__5,*n) - p; + } + } + +/* L2001: */ + } +/* end of the p-loop */ +/* end of doing the block ( ibr, ibr ) */ +/* L1002: */ + } +/* end of ir1-loop */ + +/* ........................................................ */ +/* ... go to the off diagonal blocks */ + + igl = (ibr - 1) * kbl + 1; + + i__3 = nbl; + for (jbc = ibr + 1; jbc <= i__3; ++jbc) { + + jgl = (jbc - 1) * kbl + 1; + +/* doing the block at ( ibr, jbc ) */ + + ijblsk = 0; +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = f2cmin(i__5,*n); + for (p = igl; p <= i__4; ++p) { + + aapp = sva[p]; + + if (aapp > 0.f) { + + pskipped = 0; + +/* Computing MIN */ + i__6 = jgl + kbl - 1; + i__5 = f2cmin(i__6,*n); + for (q = jgl; q <= i__5; ++q) { + + aaqq = sva[q]; + + if (aaqq > 0.f) { + aapp0 = aapp; + + + + if (aaqq >= 1.f) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + scopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + slascl_("G", &c__0, &c__0, &aapp, & + d__[p], m, &c__1, &work[1], + lda, &ierr); + aapq = sdot_(m, &work[1], &c__1, &a[q + * a_dim1 + 1], &c__1) * d__[q] + / aaqq; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + scopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + slascl_("G", &c__0, &c__0, &aaqq, & + d__[q], m, &c__1, &work[1], + lda, &ierr); + aapq = sdot_(m, &work[1], &c__1, &a[p + * a_dim1 + 1], &c__1) * d__[p] + / aapp; + } + } + +/* Computing MAX */ + r__1 = mxaapq, r__2 = abs(aapq); + mxaapq = f2cmax(r__1,r__2); + +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > *tol) { + notrot = 0; +/* ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (r__1 = aqoap - apoaq, abs( + r__1)) * -.5f / aapq; + if (aaqq > aapp0) { + theta = -theta; + } + + if (abs(theta) > bigtheta) { + t = .5f / theta; + fastr[2] = t * d__[p] / d__[q]; + fastr[3] = -t * d__[q] / d__[p]; + srotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(t); + mxsinj = f2cmax(r__1,r__2); + } else { + + + thsign = -r_sign(&c_b42, &aapq); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1.f / (theta + thsign * sqrt( + theta * theta + 1.f)); + cs = sqrt(1.f / (t * t + 1.f)); + sn = t * cs; +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(sn); + mxsinj = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); + + apoaq = d__[p] / d__[q]; + aqoap = d__[q] / d__[p]; + if (d__[p] >= 1.f) { + + if (d__[q] >= 1.f) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + d__[p] *= cs; + d__[q] *= cs; + srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + d__[p] *= cs; + d__[q] /= cs; + } + } else { + if (d__[q] >= 1.f) { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + d__[p] /= cs; + d__[q] *= cs; + } else { + if (d__[p] >= d__[q]) { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__[p] *= cs; + d__[q] /= cs; + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__[p] /= cs; + d__[q] *= cs; + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + + } else { + if (aapp > aaqq) { + scopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + slascl_("G", &c__0, &c__0, &aapp, + &c_b42, m, &c__1, &work[1] + , lda, &ierr); + slascl_("G", &c__0, &c__0, &aaqq, + &c_b42, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[p] / d__[q]; + saxpy_(m, &temp1, &work[1], &c__1, + &a[q * a_dim1 + 1], & + c__1); + slascl_("G", &c__0, &c__0, &c_b42, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq * + aapq; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } else { + scopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[1], &c__1); + slascl_("G", &c__0, &c__0, &aaqq, + &c_b42, m, &c__1, &work[1] + , lda, &ierr); + slascl_("G", &c__0, &c__0, &aapp, + &c_b42, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[q] / d__[p]; + saxpy_(m, &temp1, &work[1], &c__1, + &a[p * a_dim1 + 1], & + c__1); + slascl_("G", &c__0, &c__0, &c_b42, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq * + aapq; + sva[p] = aapp * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q) */ +/* Computing 2nd power */ + r__1 = sva[q] / aaqq; + if (r__1 * r__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = snrm2_(m, &a[q * a_dim1 + + 1], &c__1) * d__[q]; + } else { + t = 0.f; + aaqq = 1.f; + slassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * d__[q]; + } + } +/* Computing 2nd power */ + r__1 = aapp / aapp0; + if (r__1 * r__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = snrm2_(m, &a[p * a_dim1 + + 1], &c__1) * d__[p]; + } else { + t = 0.f; + aapp = 1.f; + slassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * d__[p]; + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } + + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + + sva[p] = aapp; + + } else { + if (aapp == 0.f) { +/* Computing MIN */ + i__5 = jgl + kbl - 1; + notrot = notrot + f2cmin(i__5,*n) - jgl + 1; + } + if (aapp < 0.f) { + notrot = 0; + } + } +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = f2cmin(i__4,*n); + for (p = igl; p <= i__3; ++p) { + sva[p] = (r__1 = sva[p], abs(r__1)); +/* L2012: */ + } + +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = snrm2_(m, &a[*n * a_dim1 + 1], &c__1) * d__[*n]; + } else { + t = 0.f; + aapp = 1.f; + slassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp) * d__[*n]; + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + + if (i__ > swband + 1 && mxaapq < (real) (*n) * *tol && (real) (*n) * + mxaapq * mxsinj < *tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } +/* L1993: */ + } +/* end i=1:NSWEEP loop */ +/* #:) Reaching this point means that the procedure has completed the given */ +/* number of iterations. */ + *info = *nsweep - 1; + goto L1995; +L1994: +/* #:) Reaching this point means that during the i-th sweep all pivots were */ +/* below the given tolerance, causing early exit. */ + + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the vector D. */ + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = isamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = d__[p]; + d__[p] = d__[q]; + d__[q] = temp1; + sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } +/* L5991: */ + } + + return 0; +} /* sgsvj0_ */ + diff --git a/lapack-netlib/SRC/sgsvj1.c b/lapack-netlib/SRC/sgsvj1.c new file mode 100644 index 000000000..0516e5b31 --- /dev/null +++ b/lapack-netlib/SRC/sgsvj1.c @@ -0,0 +1,1234 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGSVJ1 pre-processor for the routine sgesvj, applies Jacobi rotations targeting only particular + pivots. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGSVJ1 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, */ +/* EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) */ + +/* REAL EPS, SFMIN, TOL */ +/* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP */ +/* CHARACTER*1 JOBV */ +/* REAL A( LDA, * ), D( N ), SVA( N ), V( LDV, * ), */ +/* $ WORK( LWORK ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGSVJ1 is called from SGESVJ as a pre-processor and that is its main */ +/* > purpose. It applies Jacobi rotations in the same way as SGESVJ does, but */ +/* > it targets only particular pivots and it does not check convergence */ +/* > (stopping criterion). Few tunning parameters (marked by [TP]) are */ +/* > available for the implementer. */ +/* > */ +/* > Further Details */ +/* > ~~~~~~~~~~~~~~~ */ +/* > SGSVJ1 applies few sweeps of Jacobi rotations in the column space of */ +/* > the input M-by-N matrix A. The pivot pairs are taken from the (1,2) */ +/* > off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The */ +/* > block-entries (tiles) of the (1,2) off-diagonal block are marked by the */ +/* > [x]'s in the following scheme: */ +/* > */ +/* > | * * * [x] [x] [x]| */ +/* > | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ +/* > | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ +/* > |[x] [x] [x] * * * | */ +/* > |[x] [x] [x] * * * | */ +/* > |[x] [x] [x] * * * | */ +/* > */ +/* > In terms of the columns of A, the first N1 columns are rotated 'against' */ +/* > the remaining N-N1 columns, trying to increase the angle between the */ +/* > corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is */ +/* > tiled using quadratic tiles of side KBL. Here, KBL is a tunning parameter. */ +/* > The number of sweeps is given in NSWEEP and the orthogonality threshold */ +/* > is given in TOL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBV */ +/* > \verbatim */ +/* > JOBV is CHARACTER*1 */ +/* > Specifies whether the output from this procedure is used */ +/* > to compute the matrix V: */ +/* > = 'V': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the N-by-N array V. */ +/* > (See the description of V.) */ +/* > = 'A': the product of the Jacobi rotations is accumulated */ +/* > by postmulyiplying the MV-by-N array V. */ +/* > (See the descriptions of MV and V.) */ +/* > = 'N': the Jacobi rotations are not accumulated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. */ +/* > M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N1 */ +/* > \verbatim */ +/* > N1 is INTEGER */ +/* > N1 specifies the 2 x 2 block partition, the first N1 columns are */ +/* > rotated 'against' the remaining N-N1 columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, M-by-N matrix A, such that A*diag(D) represents */ +/* > the input matrix. */ +/* > On exit, */ +/* > A_onexit * D_onexit represents the input matrix A*diag(D) */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of N1, D, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The array D accumulates the scaling factors from the fast scaled */ +/* > Jacobi rotations. */ +/* > On entry, A*diag(D) represents the input matrix. */ +/* > On exit, A_onexit*diag(D_onexit) represents the input matrix */ +/* > post-multiplied by a sequence of Jacobi rotations, where the */ +/* > rotation threshold and the total number of sweeps are given in */ +/* > TOL and NSWEEP, respectively. */ +/* > (See the descriptions of N1, A, TOL and NSWEEP.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SVA */ +/* > \verbatim */ +/* > SVA is REAL array, dimension (N) */ +/* > On entry, SVA contains the Euclidean norms of the columns of */ +/* > the matrix A*diag(D). */ +/* > On exit, SVA contains the Euclidean norms of the columns of */ +/* > the matrix onexit*diag(D_onexit). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MV */ +/* > \verbatim */ +/* > MV is INTEGER */ +/* > If JOBV = 'A', then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then MV is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] V */ +/* > \verbatim */ +/* > V is REAL array, dimension (LDV,N) */ +/* > If JOBV = 'V' then N rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'A' then MV rows of V are post-multipled by a */ +/* > sequence of Jacobi rotations. */ +/* > If JOBV = 'N', then V is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V, LDV >= 1. */ +/* > If JOBV = 'V', LDV >= N. */ +/* > If JOBV = 'A', LDV >= MV. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EPS */ +/* > \verbatim */ +/* > EPS is REAL */ +/* > EPS = SLAMCH('Epsilon') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SFMIN */ +/* > \verbatim */ +/* > SFMIN is REAL */ +/* > SFMIN = SLAMCH('Safe Minimum') */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TOL */ +/* > \verbatim */ +/* > TOL is REAL */ +/* > TOL is the threshold for Jacobi rotations. For a pair */ +/* > A(:,p), A(:,q) of pivot columns, the Jacobi rotation is */ +/* > applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NSWEEP */ +/* > \verbatim */ +/* > NSWEEP is INTEGER */ +/* > NSWEEP is the number of sweeps of Jacobi rotations to be */ +/* > performed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (LWORK) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > LWORK is the dimension of WORK. LWORK >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, then the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date November 2017 */ + +/* > \ingroup realOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */ + +/* ===================================================================== */ +/* Subroutine */ int sgsvj1_(char *jobv, integer *m, integer *n, integer *n1, + real *a, integer *lda, real *d__, real *sva, integer *mv, real *v, + integer *ldv, real *eps, real *sfmin, real *tol, integer *nsweep, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + real r__1, r__2; + + /* Local variables */ + integer nblc; + real aapp, aapq, aaqq; + integer nblr, ierr; + real bigtheta; + extern real sdot_(integer *, real *, integer *, real *, integer *); + integer pskipped; + real aapp0, temp1; + extern real snrm2_(integer *, real *, integer *); + integer i__, p, q; + real t, large, apoaq, aqoap; + extern logical lsame_(char *, char *); + real theta, small, fastr[5]; + logical applv, rsvec; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + logical rotok; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), saxpy_(integer *, real *, real *, integer *, real *, + integer *), srotm_(integer *, real *, integer *, real *, integer * + , real *); + real rootsfmin, cs, sn; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer ijblsk, swband; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + integer blskip; + real mxaapq, thsign; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + real mxsinj; + integer emptsw, notrot, iswrot, jbc; + real big; + integer kbl, igl, ibr, jgl, mvl; + real rootbig, rooteps; + integer rowskip; + real roottol; + + +/* -- LAPACK computational routine (version 3.8.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* November 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --sva; + --d__; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1 * 1; + v -= v_offset; + --work; + + /* Function Body */ + applv = lsame_(jobv, "A"); + rsvec = lsame_(jobv, "V"); + if (! (rsvec || applv || lsame_(jobv, "N"))) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || *n > *m) { + *info = -3; + } else if (*n1 < 0) { + *info = -4; + } else if (*lda < *m) { + *info = -6; + } else if ((rsvec || applv) && *mv < 0) { + *info = -9; + } else if (rsvec && *ldv < *n || applv && *ldv < *mv) { + *info = -11; + } else if (*tol <= *eps) { + *info = -14; + } else if (*nsweep < 0) { + *info = -15; + } else if (*lwork < *m) { + *info = -17; + } else { + *info = 0; + } + +/* #:( */ + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGSVJ1", &i__1, (ftnlen)6); + return 0; + } + + if (rsvec) { + mvl = *n; + } else if (applv) { + mvl = *mv; + } + rsvec = rsvec || applv; + rooteps = sqrt(*eps); + rootsfmin = sqrt(*sfmin); + small = *sfmin / *eps; + big = 1.f / *sfmin; + rootbig = 1.f / rootsfmin; + large = big / sqrt((real) (*m * *n)); + bigtheta = 1.f / rooteps; + roottol = sqrt(*tol); + + +/* RSVEC = LSAME( JOBV, 'Y' ) */ + + emptsw = *n1 * (*n - *n1); + notrot = 0; + fastr[0] = 0.f; + + + kbl = f2cmin(8,*n); + nblr = *n1 / kbl; + if (nblr * kbl != *n1) { + ++nblr; + } + nblc = (*n - *n1) / kbl; + if (nblc * kbl != *n - *n1) { + ++nblc; + } +/* Computing 2nd power */ + i__1 = kbl; + blskip = i__1 * i__1 + 1; +/* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */ + rowskip = f2cmin(5,kbl); +/* [TP] ROWSKIP is a tuning parameter. */ + swband = 0; +/* [TP] SWBAND is a tuning parameter. It is meaningful and effective */ +/* if SGESVJ is used as a computational routine in the preconditioned */ +/* Jacobi SVD algorithm SGESVJ. */ + + +/* | * * * [x] [x] [x]| */ +/* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. */ +/* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ +/* |[x] [x] [x] * * * | */ + + + i__1 = *nsweep; + for (i__ = 1; i__ <= i__1; ++i__) { + + mxaapq = 0.f; + mxsinj = 0.f; + iswrot = 0; + + notrot = 0; + pskipped = 0; + + i__2 = nblr; + for (ibr = 1; ibr <= i__2; ++ibr) { + igl = (ibr - 1) * kbl + 1; + + +/* ........................................................ */ +/* ... go to the off diagonal blocks */ + igl = (ibr - 1) * kbl + 1; + i__3 = nblc; + for (jbc = 1; jbc <= i__3; ++jbc) { + jgl = *n1 + (jbc - 1) * kbl + 1; +/* doing the block at ( ibr, jbc ) */ + ijblsk = 0; +/* Computing MIN */ + i__5 = igl + kbl - 1; + i__4 = f2cmin(i__5,*n1); + for (p = igl; p <= i__4; ++p) { + aapp = sva[p]; + if (aapp > 0.f) { + pskipped = 0; +/* Computing MIN */ + i__6 = jgl + kbl - 1; + i__5 = f2cmin(i__6,*n); + for (q = jgl; q <= i__5; ++q) { + + aaqq = sva[q]; + if (aaqq > 0.f) { + aapp0 = aapp; + + + + if (aaqq >= 1.f) { + if (aapp >= aaqq) { + rotok = small * aapp <= aaqq; + } else { + rotok = small * aaqq <= aapp; + } + if (aapp < big / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + scopy_(m, &a[p * a_dim1 + 1], &c__1, & + work[1], &c__1); + slascl_("G", &c__0, &c__0, &aapp, & + d__[p], m, &c__1, &work[1], + lda, &ierr); + aapq = sdot_(m, &work[1], &c__1, &a[q + * a_dim1 + 1], &c__1) * d__[q] + / aaqq; + } + } else { + if (aapp >= aaqq) { + rotok = aapp <= aaqq / small; + } else { + rotok = aaqq <= aapp / small; + } + if (aapp > small / aaqq) { + aapq = sdot_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], & + c__1) * d__[p] * d__[q] / + aaqq / aapp; + } else { + scopy_(m, &a[q * a_dim1 + 1], &c__1, & + work[1], &c__1); + slascl_("G", &c__0, &c__0, &aaqq, & + d__[q], m, &c__1, &work[1], + lda, &ierr); + aapq = sdot_(m, &work[1], &c__1, &a[p + * a_dim1 + 1], &c__1) * d__[p] + / aapp; + } + } +/* Computing MAX */ + r__1 = mxaapq, r__2 = abs(aapq); + mxaapq = f2cmax(r__1,r__2); +/* TO rotate or NOT to rotate, THAT is the question ... */ + + if (abs(aapq) > *tol) { + notrot = 0; +/* ROTATED = ROTATED + 1 */ + pskipped = 0; + ++iswrot; + + if (rotok) { + + aqoap = aaqq / aapp; + apoaq = aapp / aaqq; + theta = (r__1 = aqoap - apoaq, abs( + r__1)) * -.5f / aapq; + if (aaqq > aapp0) { + theta = -theta; + } + if (abs(theta) > bigtheta) { + t = .5f / theta; + fastr[2] = t * d__[p] / d__[q]; + fastr[3] = -t * d__[q] / d__[p]; + srotm_(m, &a[p * a_dim1 + 1], & + c__1, &a[q * a_dim1 + 1], + &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * + v_dim1 + 1], &c__1, fastr); + } +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(t); + mxsinj = f2cmax(r__1,r__2); + } else { + + + thsign = -r_sign(&c_b35, &aapq); + if (aaqq > aapp0) { + thsign = -thsign; + } + t = 1.f / (theta + thsign * sqrt( + theta * theta + 1.f)); + cs = sqrt(1.f / (t * t + 1.f)); + sn = t * cs; +/* Computing MAX */ + r__1 = mxsinj, r__2 = abs(sn); + mxsinj = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = 0.f, r__2 = t * apoaq * + aapq + 1.f; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - t * + aqoap * aapq; + aapp *= sqrt((f2cmax(r__1,r__2))); + apoaq = d__[p] / d__[q]; + aqoap = d__[q] / d__[p]; + if (d__[p] >= 1.f) { + + if (d__[q] >= 1.f) { + fastr[2] = t * apoaq; + fastr[3] = -t * aqoap; + d__[p] *= cs; + d__[q] *= cs; + srotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * + a_dim1 + 1], &c__1, fastr); + if (rsvec) { + srotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[ + q * v_dim1 + 1], &c__1, fastr); + } + } else { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + } + d__[p] *= cs; + d__[q] /= cs; + } + } else { + if (d__[q] >= 1.f) { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, &a[ + q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, &a[ + p * a_dim1 + 1], &c__1); + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], & + c__1, &v[q * v_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], & + c__1, &v[p * v_dim1 + 1], &c__1); + } + d__[p] /= cs; + d__[q] *= cs; + } else { + if (d__[p] >= d__[q]) { + r__1 = -t * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + r__1 = cs * sn * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + d__[p] *= cs; + d__[q] /= cs; + if (rsvec) { + r__1 = -t * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + r__1 = cs * sn * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } else { + r__1 = t * apoaq; + saxpy_(m, &r__1, &a[p * a_dim1 + 1], &c__1, + &a[q * a_dim1 + 1], &c__1); + r__1 = -cs * sn * aqoap; + saxpy_(m, &r__1, &a[q * a_dim1 + 1], &c__1, + &a[p * a_dim1 + 1], &c__1); + d__[p] /= cs; + d__[q] *= cs; + if (rsvec) { + r__1 = t * apoaq; + saxpy_(&mvl, &r__1, &v[p * v_dim1 + 1], + &c__1, &v[q * v_dim1 + 1], & + c__1); + r__1 = -cs * sn * aqoap; + saxpy_(&mvl, &r__1, &v[q * v_dim1 + 1], + &c__1, &v[p * v_dim1 + 1], & + c__1); + } + } + } + } + } + } else { + if (aapp > aaqq) { + scopy_(m, &a[p * a_dim1 + 1], & + c__1, &work[1], &c__1); + slascl_("G", &c__0, &c__0, &aapp, + &c_b35, m, &c__1, &work[1] + , lda, &ierr); + slascl_("G", &c__0, &c__0, &aaqq, + &c_b35, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[p] / d__[q]; + saxpy_(m, &temp1, &work[1], &c__1, + &a[q * a_dim1 + 1], & + c__1); + slascl_("G", &c__0, &c__0, &c_b35, + &aaqq, m, &c__1, &a[q * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq * + aapq; + sva[q] = aaqq * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } else { + scopy_(m, &a[q * a_dim1 + 1], & + c__1, &work[1], &c__1); + slascl_("G", &c__0, &c__0, &aaqq, + &c_b35, m, &c__1, &work[1] + , lda, &ierr); + slascl_("G", &c__0, &c__0, &aapp, + &c_b35, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); + temp1 = -aapq * d__[q] / d__[p]; + saxpy_(m, &temp1, &work[1], &c__1, + &a[p * a_dim1 + 1], & + c__1); + slascl_("G", &c__0, &c__0, &c_b35, + &aapp, m, &c__1, &a[p * + a_dim1 + 1], lda, &ierr); +/* Computing MAX */ + r__1 = 0.f, r__2 = 1.f - aapq * + aapq; + sva[p] = aapp * sqrt((f2cmax(r__1, + r__2))); + mxsinj = f2cmax(mxsinj,*sfmin); + } + } +/* END IF ROTOK THEN ... ELSE */ + +/* In the case of cancellation in updating SVA(q) */ +/* Computing 2nd power */ + r__1 = sva[q] / aaqq; + if (r__1 * r__1 <= rooteps) { + if (aaqq < rootbig && aaqq > + rootsfmin) { + sva[q] = snrm2_(m, &a[q * a_dim1 + + 1], &c__1) * d__[q]; + } else { + t = 0.f; + aaqq = 1.f; + slassq_(m, &a[q * a_dim1 + 1], & + c__1, &t, &aaqq); + sva[q] = t * sqrt(aaqq) * d__[q]; + } + } +/* Computing 2nd power */ + r__1 = aapp / aapp0; + if (r__1 * r__1 <= rooteps) { + if (aapp < rootbig && aapp > + rootsfmin) { + aapp = snrm2_(m, &a[p * a_dim1 + + 1], &c__1) * d__[p]; + } else { + t = 0.f; + aapp = 1.f; + slassq_(m, &a[p * a_dim1 + 1], & + c__1, &t, &aapp); + aapp = t * sqrt(aapp) * d__[p]; + } + sva[p] = aapp; + } +/* end of OK rotation */ + } else { + ++notrot; +/* SKIPPED = SKIPPED + 1 */ + ++pskipped; + ++ijblsk; + } + } else { + ++notrot; + ++pskipped; + ++ijblsk; + } +/* IF ( NOTROT .GE. EMPTSW ) GO TO 2011 */ + if (i__ <= swband && ijblsk >= blskip) { + sva[p] = aapp; + notrot = 0; + goto L2011; + } + if (i__ <= swband && pskipped > rowskip) { + aapp = -aapp; + notrot = 0; + goto L2203; + } + +/* L2200: */ + } +/* end of the q-loop */ +L2203: + sva[p] = aapp; + + } else { + if (aapp == 0.f) { +/* Computing MIN */ + i__5 = jgl + kbl - 1; + notrot = notrot + f2cmin(i__5,*n) - jgl + 1; + } + if (aapp < 0.f) { + notrot = 0; + } +/* ** IF ( NOTROT .GE. EMPTSW ) GO TO 2011 */ + } +/* L2100: */ + } +/* end of the p-loop */ +/* L2010: */ + } +/* end of the jbc-loop */ +L2011: +/* 2011 bailed out of the jbc-loop */ +/* Computing MIN */ + i__4 = igl + kbl - 1; + i__3 = f2cmin(i__4,*n); + for (p = igl; p <= i__3; ++p) { + sva[p] = (r__1 = sva[p], abs(r__1)); +/* L2012: */ + } +/* ** IF ( NOTROT .GE. EMPTSW ) GO TO 1994 */ +/* L2000: */ + } +/* 2000 :: end of the ibr-loop */ + + if (sva[*n] < rootbig && sva[*n] > rootsfmin) { + sva[*n] = snrm2_(m, &a[*n * a_dim1 + 1], &c__1) * d__[*n]; + } else { + t = 0.f; + aapp = 1.f; + slassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp); + sva[*n] = t * sqrt(aapp) * d__[*n]; + } + +/* Additional steering devices */ + + if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) { + swband = i__; + } + if (i__ > swband + 1 && mxaapq < (real) (*n) * *tol && (real) (*n) * + mxaapq * mxsinj < *tol) { + goto L1994; + } + + if (notrot >= emptsw) { + goto L1994; + } +/* L1993: */ + } +/* end i=1:NSWEEP loop */ +/* #:) Reaching this point means that the procedure has completed the given */ +/* number of sweeps. */ + *info = *nsweep - 1; + goto L1995; +L1994: +/* #:) Reaching this point means that during the i-th sweep all pivots were */ +/* below the given threshold, causing early exit. */ + *info = 0; +/* #:) INFO = 0 confirms successful iterations. */ +L1995: + +/* Sort the vector D */ + + i__1 = *n - 1; + for (p = 1; p <= i__1; ++p) { + i__2 = *n - p + 1; + q = isamax_(&i__2, &sva[p], &c__1) + p - 1; + if (p != q) { + temp1 = sva[p]; + sva[p] = sva[q]; + sva[q] = temp1; + temp1 = d__[p]; + d__[p] = d__[q]; + d__[q] = temp1; + sswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1); + if (rsvec) { + sswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], & + c__1); + } + } +/* L5991: */ + } + + return 0; +} /* sgsvj1_ */ + diff --git a/lapack-netlib/SRC/sgtcon.c b/lapack-netlib/SRC/sgtcon.c new file mode 100644 index 000000000..32e36623b --- /dev/null +++ b/lapack-netlib/SRC/sgtcon.c @@ -0,0 +1,649 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGTCON */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGTCON + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, */ +/* WORK, IWORK, INFO ) */ + +/* CHARACTER NORM */ +/* INTEGER INFO, N */ +/* REAL ANORM, RCOND */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* REAL D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGTCON estimates the reciprocal of the condition number of a real */ +/* > tridiagonal matrix A using the LU factorization as computed by */ +/* > SGTTRF. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies whether the 1-norm condition number or the */ +/* > infinity-norm condition number is required: */ +/* > = '1' or 'O': 1-norm; */ +/* > = 'I': Infinity-norm. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is REAL array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A as computed by SGTTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is REAL array, dimension (N-1) */ +/* > The (n-1) elements of the first superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is REAL array, dimension (N-2) */ +/* > The (n-2) elements of the second superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is REAL */ +/* > If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* > If NORM = 'I', the infinity-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an */ +/* > estimate of the 1-norm of inv(A) computed in this routine. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (2*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgtcon_(char *norm, integer *n, real *dl, real *d__, + real *du, real *du2, integer *ipiv, real *anorm, real *rcond, real * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer kase, kase1, i__; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int slacn2_(integer *, real *, real *, integer *, + real *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + real ainvnm; + logical onenrm; + extern /* Subroutine */ int sgttrs_(char *, integer *, integer *, real *, + real *, real *, real *, integer *, real *, integer *, integer *); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input arguments. */ + + /* Parameter adjustments */ + --iwork; + --work; + --ipiv; + --du2; + --du; + --d__; + --dl; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O"); + if (! onenrm && ! lsame_(norm, "I")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*anorm < 0.f) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGTCON", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.f; + if (*n == 0) { + *rcond = 1.f; + return 0; + } else if (*anorm == 0.f) { + return 0; + } + +/* Check that D(1:N) is non-zero. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] == 0.f) { + return 0; + } +/* L10: */ + } + + ainvnm = 0.f; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L20: + slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(U)*inv(L). */ + + sgttrs_("No transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1] + , &ipiv[1], &work[1], n, info); + } else { + +/* Multiply by inv(L**T)*inv(U**T). */ + + sgttrs_("Transpose", n, &c__1, &dl[1], &d__[1], &du[1], &du2[1], & + ipiv[1], &work[1], n, info); + } + goto L20; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.f) { + *rcond = 1.f / ainvnm / *anorm; + } + + return 0; + +/* End of SGTCON */ + +} /* sgtcon_ */ + diff --git a/lapack-netlib/SRC/sgtrfs.c b/lapack-netlib/SRC/sgtrfs.c new file mode 100644 index 000000000..6135ed771 --- /dev/null +++ b/lapack-netlib/SRC/sgtrfs.c @@ -0,0 +1,913 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGTRFS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGTRFS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, */ +/* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDB, LDX, N, NRHS */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), */ +/* $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), */ +/* $ FERR( * ), WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGTRFS improves the computed solution to a system of linear */ +/* > equations when the coefficient matrix is tridiagonal, and provides */ +/* > error bounds and backward error estimates for the solution. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is REAL array, dimension (N-1) */ +/* > The (n-1) subdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The diagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is REAL array, dimension (N-1) */ +/* > The (n-1) superdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DLF */ +/* > \verbatim */ +/* > DLF is REAL array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A as computed by SGTTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DF */ +/* > \verbatim */ +/* > DF is REAL array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DUF */ +/* > \verbatim */ +/* > DUF is REAL array, dimension (N-1) */ +/* > The (n-1) elements of the first superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is REAL array, dimension (N-2) */ +/* > The (n-2) elements of the second superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > The right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is REAL array, dimension (LDX,NRHS) */ +/* > On entry, the solution matrix X, as computed by SGTTRS. */ +/* > On exit, the improved solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is REAL array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (3*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > ITMAX is the maximum number of steps of iterative refinement. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgtrfs_(char *trans, integer *n, integer *nrhs, real *dl, + real *d__, real *du, real *dlf, real *df, real *duf, real *du2, + integer *ipiv, real *b, integer *ldb, real *x, integer *ldx, real * + ferr, real *berr, real *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + integer kase; + real safe1, safe2; + integer i__, j; + real s; + extern logical lsame_(char *, char *); + integer isave[3], count; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), saxpy_(integer *, real *, real *, integer *, real *, + integer *), slacn2_(integer *, real *, real *, integer *, real *, + integer *, integer *); + extern real slamch_(char *); + integer nz; + real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), slagtm_( + char *, integer *, integer *, real *, real *, real *, real *, + real *, integer *, real *, real *, integer *); + logical notran; + char transn[1], transt[1]; + real lstres; + extern /* Subroutine */ int sgttrs_(char *, integer *, integer *, real *, + real *, real *, real *, integer *, real *, integer *, integer *); + real eps; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --dlf; + --df; + --duf; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < f2cmax(1,*n)) { + *info = -13; + } else if (*ldx < f2cmax(1,*n)) { + *info = -15; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGTRFS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + ferr[j] = 0.f; + berr[j] = 0.f; +/* L10: */ + } + return 0; + } + + if (notran) { + *(unsigned char *)transn = 'N'; + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transn = 'T'; + *(unsigned char *)transt = 'N'; + } + +/* NZ = maximum number of nonzero elements in each row of A, plus 1 */ + + nz = 4; + eps = slamch_("Epsilon"); + safmin = slamch_("Safe minimum"); + safe1 = nz * safmin; + safe2 = safe1 / eps; + +/* Do for each right hand side */ + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + + count = 1; + lstres = 3.f; +L20: + +/* Loop until stopping criterion is satisfied. */ + +/* Compute residual R = B - op(A) * X, */ +/* where op(A) = A, A**T, or A**H, depending on TRANS. */ + + scopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1); + slagtm_(trans, n, &c__1, &c_b18, &dl[1], &d__[1], &du[1], &x[j * + x_dim1 + 1], ldx, &c_b19, &work[*n + 1], n); + +/* Compute abs(op(A))*abs(x) + abs(b) for use in the backward */ +/* error bound. */ + + if (notran) { + if (*n == 1) { + work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[ + 1] * x[j * x_dim1 + 1], abs(r__2)); + } else { + work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[ + 1] * x[j * x_dim1 + 1], abs(r__2)) + (r__3 = du[1] * + x[j * x_dim1 + 2], abs(r__3)); + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + work[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1)) + ( + r__2 = dl[i__ - 1] * x[i__ - 1 + j * x_dim1], abs( + r__2)) + (r__3 = d__[i__] * x[i__ + j * x_dim1], + abs(r__3)) + (r__4 = du[i__] * x[i__ + 1 + j * + x_dim1], abs(r__4)); +/* L30: */ + } + work[*n] = (r__1 = b[*n + j * b_dim1], abs(r__1)) + (r__2 = + dl[*n - 1] * x[*n - 1 + j * x_dim1], abs(r__2)) + ( + r__3 = d__[*n] * x[*n + j * x_dim1], abs(r__3)); + } + } else { + if (*n == 1) { + work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[ + 1] * x[j * x_dim1 + 1], abs(r__2)); + } else { + work[1] = (r__1 = b[j * b_dim1 + 1], abs(r__1)) + (r__2 = d__[ + 1] * x[j * x_dim1 + 1], abs(r__2)) + (r__3 = dl[1] * + x[j * x_dim1 + 2], abs(r__3)); + i__2 = *n - 1; + for (i__ = 2; i__ <= i__2; ++i__) { + work[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1)) + ( + r__2 = du[i__ - 1] * x[i__ - 1 + j * x_dim1], abs( + r__2)) + (r__3 = d__[i__] * x[i__ + j * x_dim1], + abs(r__3)) + (r__4 = dl[i__] * x[i__ + 1 + j * + x_dim1], abs(r__4)); +/* L40: */ + } + work[*n] = (r__1 = b[*n + j * b_dim1], abs(r__1)) + (r__2 = + du[*n - 1] * x[*n - 1 + j * x_dim1], abs(r__2)) + ( + r__3 = d__[*n] * x[*n + j * x_dim1], abs(r__3)); + } + } + +/* Compute componentwise relative backward error from formula */ + +/* f2cmax(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */ + +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. If the i-th component of the denominator is less */ +/* than SAFE2, then SAFE1 is added to the i-th components of the */ +/* numerator and denominator before dividing. */ + + s = 0.f; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { +/* Computing MAX */ + r__2 = s, r__3 = (r__1 = work[*n + i__], abs(r__1)) / work[ + i__]; + s = f2cmax(r__2,r__3); + } else { +/* Computing MAX */ + r__2 = s, r__3 = ((r__1 = work[*n + i__], abs(r__1)) + safe1) + / (work[i__] + safe1); + s = f2cmax(r__2,r__3); + } +/* L50: */ + } + berr[j] = s; + +/* Test stopping criterion. Continue iterating if */ +/* 1) The residual BERR(J) is larger than machine epsilon, and */ +/* 2) BERR(J) decreased by at least a factor of 2 during the */ +/* last iteration, and */ +/* 3) At most ITMAX iterations tried. */ + + if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) { + +/* Update solution and try again. */ + + sgttrs_(trans, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[ + 1], &work[*n + 1], n, info); + saxpy_(n, &c_b19, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1) + ; + lstres = berr[j]; + ++count; + goto L20; + } + +/* Bound error from formula */ + +/* norm(X - XTRUE) / norm(X) .le. FERR = */ +/* norm( abs(inv(op(A)))* */ +/* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */ + +/* where */ +/* norm(Z) is the magnitude of the largest component of Z */ +/* inv(op(A)) is the inverse of op(A) */ +/* abs(Z) is the componentwise absolute value of the matrix or */ +/* vector Z */ +/* NZ is the maximum number of nonzeros in any row of A, plus 1 */ +/* EPS is machine epsilon */ + +/* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */ +/* is incremented by SAFE1 if the i-th component of */ +/* abs(op(A))*abs(X) + abs(B) is less than SAFE2. */ + +/* Use SLACN2 to estimate the infinity-norm of the matrix */ +/* inv(op(A)) * diag(W), */ +/* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + if (work[i__] > safe2) { + work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * + work[i__]; + } else { + work[i__] = (r__1 = work[*n + i__], abs(r__1)) + nz * eps * + work[i__] + safe1; + } +/* L60: */ + } + + kase = 0; +L70: + slacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & + kase, isave); + if (kase != 0) { + if (kase == 1) { + +/* Multiply by diag(W)*inv(op(A)**T). */ + + sgttrs_(transt, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & + ipiv[1], &work[*n + 1], n, info); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L80: */ + } + } else { + +/* Multiply by inv(op(A))*diag(W). */ + + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + work[*n + i__] = work[i__] * work[*n + i__]; +/* L90: */ + } + sgttrs_(transn, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & + ipiv[1], &work[*n + 1], n, info); + } + goto L70; + } + +/* Normalize error. */ + + lstres = 0.f; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = lstres, r__3 = (r__1 = x[i__ + j * x_dim1], abs(r__1)); + lstres = f2cmax(r__2,r__3); +/* L100: */ + } + if (lstres != 0.f) { + ferr[j] /= lstres; + } + +/* L110: */ + } + + return 0; + +/* End of SGTRFS */ + +} /* sgtrfs_ */ + diff --git a/lapack-netlib/SRC/sgtsv.c b/lapack-netlib/SRC/sgtsv.c new file mode 100644 index 000000000..3bf62e8a0 --- /dev/null +++ b/lapack-netlib/SRC/sgtsv.c @@ -0,0 +1,745 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGTSV computes the solution to system of linear equations A * X = B for GT matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGTSV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) */ + +/* INTEGER INFO, LDB, N, NRHS */ +/* REAL B( LDB, * ), D( * ), DL( * ), DU( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGTSV solves the equation */ +/* > */ +/* > A*X = B, */ +/* > */ +/* > where A is an n by n tridiagonal matrix, by Gaussian elimination with */ +/* > partial pivoting. */ +/* > */ +/* > Note that the equation A**T*X = B may be solved by interchanging the */ +/* > order of the arguments DU and DL. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DL */ +/* > \verbatim */ +/* > DL is REAL array, dimension (N-1) */ +/* > On entry, DL must contain the (n-1) sub-diagonal elements of */ +/* > A. */ +/* > */ +/* > On exit, DL is overwritten by the (n-2) elements of the */ +/* > second super-diagonal of the upper triangular matrix U from */ +/* > the LU factorization of A, in DL(1), ..., DL(n-2). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > On entry, D must contain the diagonal elements of A. */ +/* > */ +/* > On exit, D is overwritten by the n diagonal elements of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DU */ +/* > \verbatim */ +/* > DU is REAL array, dimension (N-1) */ +/* > On entry, DU must contain the (n-1) super-diagonal elements */ +/* > of A. */ +/* > */ +/* > On exit, DU is overwritten by the (n-1) elements of the first */ +/* > super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the N by NRHS matrix of right hand side matrix B. */ +/* > On exit, if INFO = 0, the N by NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero, and the solution */ +/* > has not been computed. The factorization has not been */ +/* > completed unless i = N. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGTsolve */ + +/* ===================================================================== */ +/* Subroutine */ int sgtsv_(integer *n, integer *nrhs, real *dl, real *d__, + real *du, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + real fact, temp; + integer i__, j; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --dl; + --d__; + --du; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*ldb < f2cmax(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGTSV ", &i__1, (ftnlen)5); + return 0; + } + + if (*n == 0) { + return 0; + } + + if (*nrhs == 1) { + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) { + +/* No row interchange required */ + + if (d__[i__] != 0.f) { + fact = dl[i__] / d__[i__]; + d__[i__ + 1] -= fact * du[i__]; + b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1]; + } else { + *info = i__; + return 0; + } + dl[i__] = 0.f; + } else { + +/* Interchange rows I and I+1 */ + + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + temp = d__[i__ + 1]; + d__[i__ + 1] = du[i__] - fact * temp; + dl[i__] = du[i__ + 1]; + du[i__ + 1] = -fact * dl[i__]; + du[i__] = temp; + temp = b[i__ + b_dim1]; + b[i__ + b_dim1] = b[i__ + 1 + b_dim1]; + b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1]; + } +/* L10: */ + } + if (*n > 1) { + i__ = *n - 1; + if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) { + if (d__[i__] != 0.f) { + fact = dl[i__] / d__[i__]; + d__[i__ + 1] -= fact * du[i__]; + b[i__ + 1 + b_dim1] -= fact * b[i__ + b_dim1]; + } else { + *info = i__; + return 0; + } + } else { + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + temp = d__[i__ + 1]; + d__[i__ + 1] = du[i__] - fact * temp; + du[i__] = temp; + temp = b[i__ + b_dim1]; + b[i__ + b_dim1] = b[i__ + 1 + b_dim1]; + b[i__ + 1 + b_dim1] = temp - fact * b[i__ + 1 + b_dim1]; + } + } + if (d__[*n] == 0.f) { + *info = *n; + return 0; + } + } else { + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) { + +/* No row interchange required */ + + if (d__[i__] != 0.f) { + fact = dl[i__] / d__[i__]; + d__[i__ + 1] -= fact * du[i__]; + i__2 = *nrhs; + for (j = 1; j <= i__2; ++j) { + b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1]; +/* L20: */ + } + } else { + *info = i__; + return 0; + } + dl[i__] = 0.f; + } else { + +/* Interchange rows I and I+1 */ + + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + temp = d__[i__ + 1]; + d__[i__ + 1] = du[i__] - fact * temp; + dl[i__] = du[i__ + 1]; + du[i__ + 1] = -fact * dl[i__]; + du[i__] = temp; + i__2 = *nrhs; + for (j = 1; j <= i__2; ++j) { + temp = b[i__ + j * b_dim1]; + b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * + b_dim1]; +/* L30: */ + } + } +/* L40: */ + } + if (*n > 1) { + i__ = *n - 1; + if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) { + if (d__[i__] != 0.f) { + fact = dl[i__] / d__[i__]; + d__[i__ + 1] -= fact * du[i__]; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + b[i__ + 1 + j * b_dim1] -= fact * b[i__ + j * b_dim1]; +/* L50: */ + } + } else { + *info = i__; + return 0; + } + } else { + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + temp = d__[i__ + 1]; + d__[i__ + 1] = du[i__] - fact * temp; + du[i__] = temp; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + temp = b[i__ + j * b_dim1]; + b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = temp - fact * b[i__ + 1 + j * + b_dim1]; +/* L60: */ + } + } + } + if (d__[*n] == 0.f) { + *info = *n; + return 0; + } + } + +/* Back solve with the matrix U from the factorization. */ + + if (*nrhs <= 2) { + j = 1; +L70: + b[*n + j * b_dim1] /= d__[*n]; + if (*n > 1) { + b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] * b[ + *n + j * b_dim1]) / d__[*n - 1]; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + 1 + + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1]) / d__[ + i__]; +/* L80: */ + } + if (j < *nrhs) { + ++j; + goto L70; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + b[*n + j * b_dim1] /= d__[*n]; + if (*n > 1) { + b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] + * b[*n + j * b_dim1]) / d__[*n - 1]; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + + 1 + j * b_dim1] - dl[i__] * b[i__ + 2 + j * b_dim1]) + / d__[i__]; +/* L90: */ + } +/* L100: */ + } + } + + return 0; + +/* End of SGTSV */ + +} /* sgtsv_ */ + diff --git a/lapack-netlib/SRC/sgtsvx.c b/lapack-netlib/SRC/sgtsvx.c new file mode 100644 index 000000000..b0f304eb3 --- /dev/null +++ b/lapack-netlib/SRC/sgtsvx.c @@ -0,0 +1,828 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief SGTSVX computes the solution to system of linear equations A * X = B for GT matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGTSVX + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, */ +/* DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, */ +/* WORK, IWORK, INFO ) */ + +/* CHARACTER FACT, TRANS */ +/* INTEGER INFO, LDB, LDX, N, NRHS */ +/* REAL RCOND */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), */ +/* $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), */ +/* $ FERR( * ), WORK( * ), X( LDX, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGTSVX uses the LU factorization to compute the solution to a real */ +/* > system of linear equations A * X = B or A**T * X = B, */ +/* > where A is a tridiagonal matrix of order N and X and B are N-by-NRHS */ +/* > matrices. */ +/* > */ +/* > Error bounds on the solution and a condition estimate are also */ +/* > provided. */ +/* > \endverbatim */ + +/* > \par Description: */ +/* ================= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following steps are performed: */ +/* > */ +/* > 1. If FACT = 'N', the LU decomposition is used to factor the matrix A */ +/* > as A = L * U, where L is a product of permutation and unit lower */ +/* > bidiagonal matrices and U is upper triangular with nonzeros in */ +/* > only the main diagonal and first two superdiagonals. */ +/* > */ +/* > 2. If some U(i,i)=0, so that U is exactly singular, then the routine */ +/* > returns with INFO = i. Otherwise, the factored form of A is used */ +/* > to estimate the condition number of the matrix A. If the */ +/* > reciprocal of the condition number is less than machine precision, */ +/* > INFO = N+1 is returned as a warning, but the routine still goes on */ +/* > to solve for X and compute error bounds as described below. */ +/* > */ +/* > 3. The system of equations is solved for X using the factored form */ +/* > of A. */ +/* > */ +/* > 4. Iterative refinement is applied to improve the computed solution */ +/* > matrix and calculate error bounds and backward error estimates */ +/* > for it. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] FACT */ +/* > \verbatim */ +/* > FACT is CHARACTER*1 */ +/* > Specifies whether or not the factored form of A has been */ +/* > supplied on entry. */ +/* > = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored */ +/* > form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV */ +/* > will not be modified. */ +/* > = 'N': The matrix will be copied to DLF, DF, and DUF */ +/* > and factored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is REAL array, dimension (N-1) */ +/* > The (n-1) subdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The n diagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is REAL array, dimension (N-1) */ +/* > The (n-1) superdiagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DLF */ +/* > \verbatim */ +/* > DLF is REAL array, dimension (N-1) */ +/* > If FACT = 'F', then DLF is an input argument and on entry */ +/* > contains the (n-1) multipliers that define the matrix L from */ +/* > the LU factorization of A as computed by SGTTRF. */ +/* > */ +/* > If FACT = 'N', then DLF is an output argument and on exit */ +/* > contains the (n-1) multipliers that define the matrix L from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DF */ +/* > \verbatim */ +/* > DF is REAL array, dimension (N) */ +/* > If FACT = 'F', then DF is an input argument and on entry */ +/* > contains the n diagonal elements of the upper triangular */ +/* > matrix U from the LU factorization of A. */ +/* > */ +/* > If FACT = 'N', then DF is an output argument and on exit */ +/* > contains the n diagonal elements of the upper triangular */ +/* > matrix U from the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DUF */ +/* > \verbatim */ +/* > DUF is REAL array, dimension (N-1) */ +/* > If FACT = 'F', then DUF is an input argument and on entry */ +/* > contains the (n-1) elements of the first superdiagonal of U. */ +/* > */ +/* > If FACT = 'N', then DUF is an output argument and on exit */ +/* > contains the (n-1) elements of the first superdiagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DU2 */ +/* > \verbatim */ +/* > DU2 is REAL array, dimension (N-2) */ +/* > If FACT = 'F', then DU2 is an input argument and on entry */ +/* > contains the (n-2) elements of the second superdiagonal of */ +/* > U. */ +/* > */ +/* > If FACT = 'N', then DU2 is an output argument and on exit */ +/* > contains the (n-2) elements of the second superdiagonal of */ +/* > U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > If FACT = 'F', then IPIV is an input argument and on entry */ +/* > contains the pivot indices from the LU factorization of A as */ +/* > computed by SGTTRF. */ +/* > */ +/* > If FACT = 'N', then IPIV is an output argument and on exit */ +/* > contains the pivot indices from the LU factorization of A; */ +/* > row i of the matrix was interchanged with row IPIV(i). */ +/* > IPIV(i) will always be either i or i+1; IPIV(i) = i indicates */ +/* > a row interchange was not required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > The N-by-NRHS right hand side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is REAL array, dimension (LDX,NRHS) */ +/* > If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > The estimate of the reciprocal condition number of the matrix */ +/* > A. If RCOND is less than the machine precision (in */ +/* > particular, if RCOND = 0), the matrix is singular to working */ +/* > precision. This condition is indicated by a return code of */ +/* > INFO > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] FERR */ +/* > \verbatim */ +/* > FERR is REAL array, dimension (NRHS) */ +/* > The estimated forward error bound for each solution vector */ +/* > X(j) (the j-th column of the solution matrix X). */ +/* > If XTRUE is the true solution corresponding to X(j), FERR(j) */ +/* > is an estimated upper bound for the magnitude of the largest */ +/* > element in (X(j) - XTRUE) divided by the magnitude of the */ +/* > largest element in X(j). The estimate is as reliable as */ +/* > the estimate for RCOND, and is almost always a slight */ +/* > overestimate of the true error. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR */ +/* > \verbatim */ +/* > BERR is REAL array, dimension (NRHS) */ +/* > The componentwise relative backward error of each solution */ +/* > vector X(j) (i.e., the smallest relative change in */ +/* > any element of A or B that makes X(j) an exact solution). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (3*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, and i is */ +/* > <= N: U(i,i) is exactly zero. The factorization */ +/* > has not been completed unless i = N, but the */ +/* > factor U is exactly singular, so the solution */ +/* > and error bounds could not be computed. */ +/* > RCOND = 0 is returned. */ +/* > = N+1: U is nonsingular, but RCOND is less than machine */ +/* > precision, meaning that the matrix is singular */ +/* > to working precision. Nevertheless, the */ +/* > solution and error bounds are computed because */ +/* > there are a number of situations where the */ +/* > computed solution can be more accurate than the */ +/* > value of RCOND would suggest. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGTsolve */ + +/* ===================================================================== */ +/* Subroutine */ int sgtsvx_(char *fact, char *trans, integer *n, integer * + nrhs, real *dl, real *d__, real *du, real *dlf, real *df, real *duf, + real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer * + ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, x_dim1, x_offset, i__1; + + /* Local variables */ + char norm[1]; + extern logical lsame_(char *, char *); + real anorm; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + extern real slamch_(char *); + logical nofact; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern real slangt_(char *, integer *, real *, real *, real *); + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), sgtcon_(char *, integer *, + real *, real *, real *, real *, integer *, real *, real *, real *, + integer *, integer *); + logical notran; + extern /* Subroutine */ int sgtrfs_(char *, integer *, integer *, real *, + real *, real *, real *, real *, real *, real *, integer *, real *, + integer *, real *, integer *, real *, real *, real *, integer *, + integer *), sgttrf_(integer *, real *, real *, real *, + real *, integer *, integer *), sgttrs_(char *, integer *, integer + *, real *, real *, real *, real *, integer *, real *, integer *, + integer *); + + +/* -- LAPACK driver routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --dlf; + --df; + --duf; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1 * 1; + x -= x_offset; + --ferr; + --berr; + --work; + --iwork; + + /* Function Body */ + *info = 0; + nofact = lsame_(fact, "N"); + notran = lsame_(trans, "N"); + if (! nofact && ! lsame_(fact, "F")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*nrhs < 0) { + *info = -4; + } else if (*ldb < f2cmax(1,*n)) { + *info = -14; + } else if (*ldx < f2cmax(1,*n)) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGTSVX", &i__1, (ftnlen)6); + return 0; + } + + if (nofact) { + +/* Compute the LU factorization of A. */ + + scopy_(n, &d__[1], &c__1, &df[1], &c__1); + if (*n > 1) { + i__1 = *n - 1; + scopy_(&i__1, &dl[1], &c__1, &dlf[1], &c__1); + i__1 = *n - 1; + scopy_(&i__1, &du[1], &c__1, &duf[1], &c__1); + } + sgttrf_(n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], info); + +/* Return if INFO is non-zero. */ + + if (*info > 0) { + *rcond = 0.f; + return 0; + } + } + +/* Compute the norm of the matrix A. */ + + if (notran) { + *(unsigned char *)norm = '1'; + } else { + *(unsigned char *)norm = 'I'; + } + anorm = slangt_(norm, n, &dl[1], &d__[1], &du[1]); + +/* Compute the reciprocal of the condition number of A. */ + + sgtcon_(norm, n, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &anorm, + rcond, &work[1], &iwork[1], info); + +/* Compute the solution vectors X. */ + + slacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); + sgttrs_(trans, n, nrhs, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[1], &x[ + x_offset], ldx, info); + +/* Use iterative refinement to improve the computed solutions and */ +/* compute error bounds and backward error estimates for them. */ + + sgtrfs_(trans, n, nrhs, &dl[1], &d__[1], &du[1], &dlf[1], &df[1], &duf[1], + &du2[1], &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1] + , &berr[1], &work[1], &iwork[1], info); + +/* Set INFO = N+1 if the matrix is singular to working precision. */ + + if (*rcond < slamch_("Epsilon")) { + *info = *n + 1; + } + + return 0; + +/* End of SGTSVX */ + +} /* sgtsvx_ */ + diff --git a/lapack-netlib/SRC/sgttrf.c b/lapack-netlib/SRC/sgttrf.c new file mode 100644 index 000000000..ba43b3902 --- /dev/null +++ b/lapack-netlib/SRC/sgttrf.c @@ -0,0 +1,632 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGTTRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGTTRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) */ + +/* INTEGER INFO, N */ +/* INTEGER IPIV( * ) */ +/* REAL D( * ), DL( * ), DU( * ), DU2( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGTTRF computes an LU factorization of a real tridiagonal matrix A */ +/* > using elimination with partial pivoting and row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = L * U */ +/* > where L is a product of permutation and unit lower bidiagonal */ +/* > matrices and U is upper triangular with nonzeros in only the main */ +/* > diagonal and first two superdiagonals. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DL */ +/* > \verbatim */ +/* > DL is REAL array, dimension (N-1) */ +/* > On entry, DL must contain the (n-1) sub-diagonal elements of */ +/* > A. */ +/* > */ +/* > On exit, DL is overwritten by the (n-1) multipliers that */ +/* > define the matrix L from the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > On entry, D must contain the diagonal elements of A. */ +/* > */ +/* > On exit, D is overwritten by the n diagonal elements of the */ +/* > upper triangular matrix U from the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DU */ +/* > \verbatim */ +/* > DU is REAL array, dimension (N-1) */ +/* > On entry, DU must contain the (n-1) super-diagonal elements */ +/* > of A. */ +/* > */ +/* > On exit, DU is overwritten by the (n-1) elements of the first */ +/* > super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DU2 */ +/* > \verbatim */ +/* > DU2 is REAL array, dimension (N-2) */ +/* > On exit, DU2 is overwritten by the (n-2) elements of the */ +/* > second super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgttrf_(integer *n, real *dl, real *d__, real *du, real * + du2, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Local variables */ + real fact, temp; + integer i__; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --ipiv; + --du2; + --du; + --d__; + --dl; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + i__1 = -(*info); + xerbla_("SGTTRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Initialize IPIV(i) = i and DU2(I) = 0 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + ipiv[i__] = i__; +/* L10: */ + } + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + du2[i__] = 0.f; +/* L20: */ + } + + i__1 = *n - 2; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) { + +/* No row interchange required, eliminate DL(I) */ + + if (d__[i__] != 0.f) { + fact = dl[i__] / d__[i__]; + dl[i__] = fact; + d__[i__ + 1] -= fact * du[i__]; + } + } else { + +/* Interchange rows I and I+1, eliminate DL(I) */ + + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + dl[i__] = fact; + temp = du[i__]; + du[i__] = d__[i__ + 1]; + d__[i__ + 1] = temp - fact * d__[i__ + 1]; + du2[i__] = du[i__ + 1]; + du[i__ + 1] = -fact * du[i__ + 1]; + ipiv[i__] = i__ + 1; + } +/* L30: */ + } + if (*n > 1) { + i__ = *n - 1; + if ((r__1 = d__[i__], abs(r__1)) >= (r__2 = dl[i__], abs(r__2))) { + if (d__[i__] != 0.f) { + fact = dl[i__] / d__[i__]; + dl[i__] = fact; + d__[i__ + 1] -= fact * du[i__]; + } + } else { + fact = d__[i__] / dl[i__]; + d__[i__] = dl[i__]; + dl[i__] = fact; + temp = du[i__]; + du[i__] = d__[i__ + 1]; + d__[i__ + 1] = temp - fact * d__[i__ + 1]; + ipiv[i__] = i__ + 1; + } + } + +/* Check for a zero on the diagonal of U. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] == 0.f) { + *info = i__; + goto L50; + } +/* L40: */ + } +L50: + + return 0; + +/* End of SGTTRF */ + +} /* sgttrf_ */ + diff --git a/lapack-netlib/SRC/sgttrs.c b/lapack-netlib/SRC/sgttrs.c new file mode 100644 index 000000000..e616c7134 --- /dev/null +++ b/lapack-netlib/SRC/sgttrs.c @@ -0,0 +1,632 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGTTRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGTTRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, */ +/* INFO ) */ + +/* CHARACTER TRANS */ +/* INTEGER INFO, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGTTRS solves one of the systems of equations */ +/* > A*X = B or A**T*X = B, */ +/* > with a tridiagonal matrix A using the LU factorization computed */ +/* > by SGTTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations. */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T* X = B (Transpose) */ +/* > = 'C': A**T* X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is REAL array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is REAL array, dimension (N-1) */ +/* > The (n-1) elements of the first super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is REAL array, dimension (N-2) */ +/* > The (n-2) elements of the second super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the matrix of right hand side vectors B. */ +/* > On exit, B is overwritten by the solution vectors X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgttrs_(char *trans, integer *n, integer *nrhs, real *dl, + real *d__, real *du, real *du2, integer *ipiv, real *b, integer *ldb, + integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer j, jb, nb; + extern /* Subroutine */ int sgtts2_(integer *, integer *, integer *, real + *, real *, real *, real *, integer *, real *, integer *), xerbla_( + char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer itrans; + logical notran; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = *(unsigned char *)trans == 'N' || *(unsigned char *)trans == 'n'; + if (! notran && ! (*(unsigned char *)trans == 'T' || *(unsigned char *) + trans == 't') && ! (*(unsigned char *)trans == 'C' || *(unsigned + char *)trans == 'c')) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*ldb < f2cmax(*n,1)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGTTRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + +/* Decode TRANS */ + + if (notran) { + itrans = 0; + } else { + itrans = 1; + } + +/* Determine the number of right-hand sides to solve at a time. */ + + if (*nrhs == 1) { + nb = 1; + } else { +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "SGTTRS", trans, n, nrhs, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nb = f2cmax(i__1,i__2); + } + + if (nb >= *nrhs) { + sgtts2_(&itrans, n, nrhs, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[1], + &b[b_offset], ldb); + } else { + i__1 = *nrhs; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = *nrhs - j + 1; + jb = f2cmin(i__3,nb); + sgtts2_(&itrans, n, &jb, &dl[1], &d__[1], &du[1], &du2[1], &ipiv[ + 1], &b[j * b_dim1 + 1], ldb); +/* L10: */ + } + } + +/* End of SGTTRS */ + + return 0; +} /* sgttrs_ */ + diff --git a/lapack-netlib/SRC/sgtts2.c b/lapack-netlib/SRC/sgtts2.c new file mode 100644 index 000000000..923eec1f8 --- /dev/null +++ b/lapack-netlib/SRC/sgtts2.c @@ -0,0 +1,706 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SGTTS2 solves a system of linear equations with a tridiagonal matrix using the LU factorization + computed by sgttrf. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SGTTS2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) */ + +/* INTEGER ITRANS, LDB, N, NRHS */ +/* INTEGER IPIV( * ) */ +/* REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SGTTS2 solves one of the systems of equations */ +/* > A*X = B or A**T*X = B, */ +/* > with a tridiagonal matrix A using the LU factorization computed */ +/* > by SGTTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITRANS */ +/* > \verbatim */ +/* > ITRANS is INTEGER */ +/* > Specifies the form of the system of equations. */ +/* > = 0: A * X = B (No transpose) */ +/* > = 1: A**T* X = B (Transpose) */ +/* > = 2: A**T* X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DL */ +/* > \verbatim */ +/* > DL is REAL array, dimension (N-1) */ +/* > The (n-1) multipliers that define the matrix L from the */ +/* > LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is REAL array, dimension (N) */ +/* > The n diagonal elements of the upper triangular matrix U from */ +/* > the LU factorization of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU */ +/* > \verbatim */ +/* > DU is REAL array, dimension (N-1) */ +/* > The (n-1) elements of the first super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DU2 */ +/* > \verbatim */ +/* > DU2 is REAL array, dimension (N-2) */ +/* > The (n-2) elements of the second super-diagonal of U. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices; for 1 <= i <= n, row i of the matrix was */ +/* > interchanged with row IPIV(i). IPIV(i) will always be either */ +/* > i or i+1; IPIV(i) = i indicates a row interchange was not */ +/* > required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > On entry, the matrix of right hand side vectors B. */ +/* > On exit, B is overwritten by the solution vectors X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGTcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sgtts2_(integer *itrans, integer *n, integer *nrhs, real + *dl, real *d__, real *du, real *du2, integer *ipiv, real *b, integer * + ldb) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + real temp; + integer i__, j, ip; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Quick return if possible */ + + /* Parameter adjustments */ + --dl; + --d__; + --du; + --du2; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + + /* Function Body */ + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (*itrans == 0) { + +/* Solve A*X = B using the LU factorization of A, */ +/* overwriting each right hand side vector with its solution. */ + + if (*nrhs <= 1) { + j = 1; +L10: + +/* Solve L*x = b. */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + ip = ipiv[i__]; + temp = b[i__ + 1 - ip + i__ + j * b_dim1] - dl[i__] * b[ip + + j * b_dim1]; + b[i__ + j * b_dim1] = b[ip + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = temp; +/* L20: */ + } + +/* Solve U*x = b. */ + + b[*n + j * b_dim1] /= d__[*n]; + if (*n > 1) { + b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n - 1] + * b[*n + j * b_dim1]) / d__[*n - 1]; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[i__ + + 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j * b_dim1] + ) / d__[i__]; +/* L30: */ + } + if (j < *nrhs) { + ++j; + goto L10; + } + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve L*x = b. */ + + i__2 = *n - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (ipiv[i__] == i__) { + b[i__ + 1 + j * b_dim1] -= dl[i__] * b[i__ + j * + b_dim1]; + } else { + temp = b[i__ + j * b_dim1]; + b[i__ + j * b_dim1] = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = temp - dl[i__] * b[i__ + j * + b_dim1]; + } +/* L40: */ + } + +/* Solve U*x = b. */ + + b[*n + j * b_dim1] /= d__[*n]; + if (*n > 1) { + b[*n - 1 + j * b_dim1] = (b[*n - 1 + j * b_dim1] - du[*n + - 1] * b[*n + j * b_dim1]) / d__[*n - 1]; + } + for (i__ = *n - 2; i__ >= 1; --i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__] * b[ + i__ + 1 + j * b_dim1] - du2[i__] * b[i__ + 2 + j * + b_dim1]) / d__[i__]; +/* L50: */ + } +/* L60: */ + } + } + } else { + +/* Solve A**T * X = B. */ + + if (*nrhs <= 1) { + +/* Solve U**T*x = b. */ + + j = 1; +L70: + b[j * b_dim1 + 1] /= d__[1]; + if (*n > 1) { + b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * b_dim1 + + 1]) / d__[2]; + } + i__1 = *n; + for (i__ = 3; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] * b[ + i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - 2 + j * + b_dim1]) / d__[i__]; +/* L80: */ + } + +/* Solve L**T*x = b. */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + ip = ipiv[i__]; + temp = b[i__ + j * b_dim1] - dl[i__] * b[i__ + 1 + j * b_dim1] + ; + b[i__ + j * b_dim1] = b[ip + j * b_dim1]; + b[ip + j * b_dim1] = temp; +/* L90: */ + } + if (j < *nrhs) { + ++j; + goto L70; + } + + } else { + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + +/* Solve U**T*x = b. */ + + b[j * b_dim1 + 1] /= d__[1]; + if (*n > 1) { + b[j * b_dim1 + 2] = (b[j * b_dim1 + 2] - du[1] * b[j * + b_dim1 + 1]) / d__[2]; + } + i__2 = *n; + for (i__ = 3; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = (b[i__ + j * b_dim1] - du[i__ - 1] * + b[i__ - 1 + j * b_dim1] - du2[i__ - 2] * b[i__ - + 2 + j * b_dim1]) / d__[i__]; +/* L100: */ + } + for (i__ = *n - 1; i__ >= 1; --i__) { + if (ipiv[i__] == i__) { + b[i__ + j * b_dim1] -= dl[i__] * b[i__ + 1 + j * + b_dim1]; + } else { + temp = b[i__ + 1 + j * b_dim1]; + b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - dl[ + i__] * temp; + b[i__ + j * b_dim1] = temp; + } +/* L110: */ + } +/* L120: */ + } + } + } + +/* End of SGTTS2 */ + + return 0; +} /* sgtts2_ */ + diff --git a/lapack-netlib/SRC/shgeqz.c b/lapack-netlib/SRC/shgeqz.c new file mode 100644 index 000000000..7301c0e1a --- /dev/null +++ b/lapack-netlib/SRC/shgeqz.c @@ -0,0 +1,1976 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SHGEQZ */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SHGEQZ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, */ +/* ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK, */ +/* LWORK, INFO ) */ + +/* CHARACTER COMPQ, COMPZ, JOB */ +/* INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N */ +/* REAL ALPHAI( * ), ALPHAR( * ), BETA( * ), */ +/* $ H( LDH, * ), Q( LDQ, * ), T( LDT, * ), */ +/* $ WORK( * ), Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SHGEQZ computes the eigenvalues of a real matrix pair (H,T), */ +/* > where H is an upper Hessenberg matrix and T is upper triangular, */ +/* > using the double-shift QZ method. */ +/* > Matrix pairs of this type are produced by the reduction to */ +/* > generalized upper Hessenberg form of a real matrix pair (A,B): */ +/* > */ +/* > A = Q1*H*Z1**T, B = Q1*T*Z1**T, */ +/* > */ +/* > as computed by SGGHRD. */ +/* > */ +/* > If JOB='S', then the Hessenberg-triangular pair (H,T) is */ +/* > also reduced to generalized Schur form, */ +/* > */ +/* > H = Q*S*Z**T, T = Q*P*Z**T, */ +/* > */ +/* > where Q and Z are orthogonal matrices, P is an upper triangular */ +/* > matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 */ +/* > diagonal blocks. */ +/* > */ +/* > The 1-by-1 blocks correspond to real eigenvalues of the matrix pair */ +/* > (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of */ +/* > eigenvalues. */ +/* > */ +/* > Additionally, the 2-by-2 upper triangular diagonal blocks of P */ +/* > corresponding to 2-by-2 blocks of S are reduced to positive diagonal */ +/* > form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, */ +/* > P(j,j) > 0, and P(j+1,j+1) > 0. */ +/* > */ +/* > Optionally, the orthogonal matrix Q from the generalized Schur */ +/* > factorization may be postmultiplied into an input matrix Q1, and the */ +/* > orthogonal matrix Z may be postmultiplied into an input matrix Z1. */ +/* > If Q1 and Z1 are the orthogonal matrices from SGGHRD that reduced */ +/* > the matrix pair (A,B) to generalized upper Hessenberg form, then the */ +/* > output matrices Q1*Q and Z1*Z are the orthogonal factors from the */ +/* > generalized Schur factorization of (A,B): */ +/* > */ +/* > A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. */ +/* > */ +/* > To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, */ +/* > of (A,B)) are computed as a pair of values (alpha,beta), where alpha is */ +/* > complex and beta real. */ +/* > If beta is nonzero, lambda = alpha / beta is an eigenvalue of the */ +/* > generalized nonsymmetric eigenvalue problem (GNEP) */ +/* > A*x = lambda*B*x */ +/* > and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the */ +/* > alternate form of the GNEP */ +/* > mu*A*y = B*y. */ +/* > Real eigenvalues can be read directly from the generalized Schur */ +/* > form: */ +/* > alpha = S(i,i), beta = P(i,i). */ +/* > */ +/* > Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix */ +/* > Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), */ +/* > pp. 241--256. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > = 'E': Compute eigenvalues only; */ +/* > = 'S': Compute eigenvalues and the Schur form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPQ */ +/* > \verbatim */ +/* > COMPQ is CHARACTER*1 */ +/* > = 'N': Left Schur vectors (Q) are not computed; */ +/* > = 'I': Q is initialized to the unit matrix and the matrix Q */ +/* > of left Schur vectors of (H,T) is returned; */ +/* > = 'V': Q must contain an orthogonal matrix Q1 on entry and */ +/* > the product Q1*Q is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': Right Schur vectors (Z) are not computed; */ +/* > = 'I': Z is initialized to the unit matrix and the matrix Z */ +/* > of right Schur vectors of (H,T) is returned; */ +/* > = 'V': Z must contain an orthogonal matrix Z1 on entry and */ +/* > the product Z1*Z is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices H, T, Q, and Z. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > ILO and IHI mark the rows and columns of H which are in */ +/* > Hessenberg form. It is assumed that A is already upper */ +/* > triangular in rows and columns 1:ILO-1 and IHI+1:N. */ +/* > If N > 0, 1 <= ILO <= IHI <= N; if N = 0, ILO=1 and IHI=0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] H */ +/* > \verbatim */ +/* > H is REAL array, dimension (LDH, N) */ +/* > On entry, the N-by-N upper Hessenberg matrix H. */ +/* > On exit, if JOB = 'S', H contains the upper quasi-triangular */ +/* > matrix S from the generalized Schur factorization. */ +/* > If JOB = 'E', the diagonal blocks of H match those of S, but */ +/* > the rest of H is unspecified. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDH */ +/* > \verbatim */ +/* > LDH is INTEGER */ +/* > The leading dimension of the array H. LDH >= f2cmax( 1, N ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] T */ +/* > \verbatim */ +/* > T is REAL array, dimension (LDT, N) */ +/* > On entry, the N-by-N upper triangular matrix T. */ +/* > On exit, if JOB = 'S', T contains the upper triangular */ +/* > matrix P from the generalized Schur factorization; */ +/* > 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks of S */ +/* > are reduced to positive diagonal form, i.e., if H(j+1,j) is */ +/* > non-zero, then T(j+1,j) = T(j,j+1) = 0, T(j,j) > 0, and */ +/* > T(j+1,j+1) > 0. */ +/* > If JOB = 'E', the diagonal blocks of T match those of P, but */ +/* > the rest of T is unspecified. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= f2cmax( 1, N ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAR */ +/* > \verbatim */ +/* > ALPHAR is REAL array, dimension (N) */ +/* > The real parts of each scalar alpha defining an eigenvalue */ +/* > of GNEP. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ALPHAI */ +/* > \verbatim */ +/* > ALPHAI is REAL array, dimension (N) */ +/* > The imaginary parts of each scalar alpha defining an */ +/* > eigenvalue of GNEP. */ +/* > If ALPHAI(j) is zero, then the j-th eigenvalue is real; if */ +/* > positive, then the j-th and (j+1)-st eigenvalues are a */ +/* > complex conjugate pair, with ALPHAI(j+1) = -ALPHAI(j). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BETA */ +/* > \verbatim */ +/* > BETA is REAL array, dimension (N) */ +/* > The scalars beta that define the eigenvalues of GNEP. */ +/* > Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and */ +/* > beta = BETA(j) represent the j-th eigenvalue of the matrix */ +/* > pair (A,B), in one of the forms lambda = alpha/beta or */ +/* > mu = beta/alpha. Since either lambda or mu may overflow, */ +/* > they should not, in general, be computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is REAL array, dimension (LDQ, N) */ +/* > On entry, if COMPQ = 'V', the orthogonal matrix Q1 used in */ +/* > the reduction of (A,B) to generalized Hessenberg form. */ +/* > On exit, if COMPQ = 'I', the orthogonal matrix of left Schur */ +/* > vectors of (H,T), and if COMPQ = 'V', the orthogonal matrix */ +/* > of left Schur vectors of (A,B). */ +/* > Not referenced if COMPQ = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= 1. */ +/* > If COMPQ='V' or 'I', then LDQ >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is REAL array, dimension (LDZ, N) */ +/* > On entry, if COMPZ = 'V', the orthogonal matrix Z1 used in */ +/* > the reduction of (A,B) to generalized Hessenberg form. */ +/* > On exit, if COMPZ = 'I', the orthogonal matrix of */ +/* > right Schur vectors of (H,T), and if COMPZ = 'V', the */ +/* > orthogonal matrix of right Schur vectors of (A,B). */ +/* > Not referenced if COMPZ = 'N'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1. */ +/* > If COMPZ='V' or 'I', then LDZ >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N). */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > = 1,...,N: the QZ iteration did not converge. (H,T) is not */ +/* > in Schur form, but ALPHAR(i), ALPHAI(i), and */ +/* > BETA(i), i=INFO+1,...,N should be correct. */ +/* > = N+1,...,2*N: the shift calculation failed. (H,T) is not */ +/* > in Schur form, but ALPHAR(i), ALPHAI(i), and */ +/* > BETA(i), i=INFO-N+1,...,N should be correct. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2016 */ + +/* > \ingroup realGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Iteration counters: */ +/* > */ +/* > JITER -- counts iterations. */ +/* > IITER -- counts iterations run since ILAST was last */ +/* > changed. This is therefore reset only when a 1-by-1 or */ +/* > 2-by-2 block deflates off the bottom. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int shgeqz_(char *job, char *compq, char *compz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *t, integer + *ldt, real *alphar, real *alphai, real *beta, real *q, integer *ldq, + real *z__, integer *ldz, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, q_dim1, q_offset, t_dim1, t_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4; + real r__1, r__2, r__3, r__4; + + /* Local variables */ + real ad11l, ad12l, ad21l, ad22l, ad32l, wabs, atol, btol, temp; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), slag2_(real *, integer *, real *, + integer *, real *, real *, real *, real *, real *, real *); + real temp2, s1inv, c__; + integer j; + real s, v[3], scale; + extern logical lsame_(char *, char *); + integer iiter, ilast, jiter; + real anorm, bnorm; + integer maxit; + real tempi, tempr, s1, s2, t1, u1, u2; + logical ilazr2; + real a11, a12, a21, a22, b11, b22, c12, c21; + extern real slapy2_(real *, real *); + integer jc; + extern real slapy3_(real *, real *, real *); + real an, bn, cl; + extern /* Subroutine */ int slasv2_(real *, real *, real *, real *, real * + , real *, real *, real *, real *); + real cq, cr; + integer in; + real ascale, bscale, u12, w11; + integer jr; + real cz, sl, w12, w21, w22, wi, sr; + extern real slamch_(char *); + real vs, wr, safmin; + extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, + real *); + real safmax; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real eshift; + logical ilschr; + real b1a, b2a; + integer icompq, ilastm; + extern real slanhs_(char *, integer *, real *, integer *, real *); + real a1i; + integer ischur; + real a2i, b1i; + logical ilazro; + integer icompz, ifirst, ifrstm; + real a1r; + integer istart; + logical ilpivt; + real a2r, b1r, b2i, b2r; + extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * + ), slaset_(char *, integer *, integer *, real *, real *, real *, + integer *); + logical lquery; + real wr2, ad11, ad12, ad21, ad22, c11i, c22i; + integer jch; + real c11r, c22r; + logical ilq; + real u12l, tau, sqi; + logical ilz; + real ulp, sqr, szi, szr; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2016 */ + + +/* ===================================================================== */ + +/* $ SAFETY = 1.0E+0 ) */ + +/* Decode JOB, COMPQ, COMPZ */ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1 * 1; + h__ -= h_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1 * 1; + t -= t_offset; + --alphar; + --alphai; + --beta; + q_dim1 = *ldq; + q_offset = 1 + q_dim1 * 1; + q -= q_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + + /* Function Body */ + if (lsame_(job, "E")) { + ilschr = FALSE_; + ischur = 1; + } else if (lsame_(job, "S")) { + ilschr = TRUE_; + ischur = 2; + } else { + ischur = 0; + } + + if (lsame_(compq, "N")) { + ilq = FALSE_; + icompq = 1; + } else if (lsame_(compq, "V")) { + ilq = TRUE_; + icompq = 2; + } else if (lsame_(compq, "I")) { + ilq = TRUE_; + icompq = 3; + } else { + icompq = 0; + } + + if (lsame_(compz, "N")) { + ilz = FALSE_; + icompz = 1; + } else if (lsame_(compz, "V")) { + ilz = TRUE_; + icompz = 2; + } else if (lsame_(compz, "I")) { + ilz = TRUE_; + icompz = 3; + } else { + icompz = 0; + } + +/* Check Argument Values */ + + *info = 0; + work[1] = (real) f2cmax(1,*n); + lquery = *lwork == -1; + if (ischur == 0) { + *info = -1; + } else if (icompq == 0) { + *info = -2; + } else if (icompz == 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1) { + *info = -5; + } else if (*ihi > *n || *ihi < *ilo - 1) { + *info = -6; + } else if (*ldh < *n) { + *info = -8; + } else if (*ldt < *n) { + *info = -10; + } else if (*ldq < 1 || ilq && *ldq < *n) { + *info = -15; + } else if (*ldz < 1 || ilz && *ldz < *n) { + *info = -17; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SHGEQZ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1] = 1.f; + return 0; + } + +/* Initialize Q and Z */ + + if (icompq == 3) { + slaset_("Full", n, n, &c_b12, &c_b13, &q[q_offset], ldq); + } + if (icompz == 3) { + slaset_("Full", n, n, &c_b12, &c_b13, &z__[z_offset], ldz); + } + +/* Machine Constants */ + + in = *ihi + 1 - *ilo; + safmin = slamch_("S"); + safmax = 1.f / safmin; + ulp = slamch_("E") * slamch_("B"); + anorm = slanhs_("F", &in, &h__[*ilo + *ilo * h_dim1], ldh, &work[1]); + bnorm = slanhs_("F", &in, &t[*ilo + *ilo * t_dim1], ldt, &work[1]); +/* Computing MAX */ + r__1 = safmin, r__2 = ulp * anorm; + atol = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = safmin, r__2 = ulp * bnorm; + btol = f2cmax(r__1,r__2); + ascale = 1.f / f2cmax(safmin,anorm); + bscale = 1.f / f2cmax(safmin,bnorm); + +/* Set Eigenvalues IHI+1:N */ + + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + if (t[j + j * t_dim1] < 0.f) { + if (ilschr) { + i__2 = j; + for (jr = 1; jr <= i__2; ++jr) { + h__[jr + j * h_dim1] = -h__[jr + j * h_dim1]; + t[jr + j * t_dim1] = -t[jr + j * t_dim1]; +/* L10: */ + } + } else { + h__[j + j * h_dim1] = -h__[j + j * h_dim1]; + t[j + j * t_dim1] = -t[j + j * t_dim1]; + } + if (ilz) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + z__[jr + j * z_dim1] = -z__[jr + j * z_dim1]; +/* L20: */ + } + } + } + alphar[j] = h__[j + j * h_dim1]; + alphai[j] = 0.f; + beta[j] = t[j + j * t_dim1]; +/* L30: */ + } + +/* If IHI < ILO, skip QZ steps */ + + if (*ihi < *ilo) { + goto L380; + } + +/* MAIN QZ ITERATION LOOP */ + +/* Initialize dynamic indices */ + +/* Eigenvalues ILAST+1:N have been found. */ +/* Column operations modify rows IFRSTM:whatever. */ +/* Row operations modify columns whatever:ILASTM. */ + +/* If only eigenvalues are being computed, then */ +/* IFRSTM is the row of the last splitting row above row ILAST; */ +/* this is always at least ILO. */ +/* IITER counts iterations since the last eigenvalue was found, */ +/* to tell when to use an extraordinary shift. */ +/* MAXIT is the maximum number of QZ sweeps allowed. */ + + ilast = *ihi; + if (ilschr) { + ifrstm = 1; + ilastm = *n; + } else { + ifrstm = *ilo; + ilastm = *ihi; + } + iiter = 0; + eshift = 0.f; + maxit = (*ihi - *ilo + 1) * 30; + + i__1 = maxit; + for (jiter = 1; jiter <= i__1; ++jiter) { + +/* Split the matrix if possible. */ + +/* Two tests: */ +/* 1: H(j,j-1)=0 or j=ILO */ +/* 2: T(j,j)=0 */ + + if (ilast == *ilo) { + +/* Special case: j=ILAST */ + + goto L80; + } else { + if ((r__1 = h__[ilast + (ilast - 1) * h_dim1], abs(r__1)) <= atol) + { + h__[ilast + (ilast - 1) * h_dim1] = 0.f; + goto L80; + } + } + + if ((r__1 = t[ilast + ilast * t_dim1], abs(r__1)) <= btol) { + t[ilast + ilast * t_dim1] = 0.f; + goto L70; + } + +/* General case: j= i__2; --j) { + +/* Test 1: for H(j,j-1)=0 or j=ILO */ + + if (j == *ilo) { + ilazro = TRUE_; + } else { + if ((r__1 = h__[j + (j - 1) * h_dim1], abs(r__1)) <= atol) { + h__[j + (j - 1) * h_dim1] = 0.f; + ilazro = TRUE_; + } else { + ilazro = FALSE_; + } + } + +/* Test 2: for T(j,j)=0 */ + + if ((r__1 = t[j + j * t_dim1], abs(r__1)) < btol) { + t[j + j * t_dim1] = 0.f; + +/* Test 1a: Check for 2 consecutive small subdiagonals in A */ + + ilazr2 = FALSE_; + if (! ilazro) { + temp = (r__1 = h__[j + (j - 1) * h_dim1], abs(r__1)); + temp2 = (r__1 = h__[j + j * h_dim1], abs(r__1)); + tempr = f2cmax(temp,temp2); + if (tempr < 1.f && tempr != 0.f) { + temp /= tempr; + temp2 /= tempr; + } + if (temp * (ascale * (r__1 = h__[j + 1 + j * h_dim1], abs( + r__1))) <= temp2 * (ascale * atol)) { + ilazr2 = TRUE_; + } + } + +/* If both tests pass (1 & 2), i.e., the leading diagonal */ +/* element of B in the block is zero, split a 1x1 block off */ +/* at the top. (I.e., at the J-th row/column) The leading */ +/* diagonal element of the remainder can also be zero, so */ +/* this may have to be done repeatedly. */ + + if (ilazro || ilazr2) { + i__3 = ilast - 1; + for (jch = j; jch <= i__3; ++jch) { + temp = h__[jch + jch * h_dim1]; + slartg_(&temp, &h__[jch + 1 + jch * h_dim1], &c__, &s, + &h__[jch + jch * h_dim1]); + h__[jch + 1 + jch * h_dim1] = 0.f; + i__4 = ilastm - jch; + srot_(&i__4, &h__[jch + (jch + 1) * h_dim1], ldh, & + h__[jch + 1 + (jch + 1) * h_dim1], ldh, &c__, + &s); + i__4 = ilastm - jch; + srot_(&i__4, &t[jch + (jch + 1) * t_dim1], ldt, &t[ + jch + 1 + (jch + 1) * t_dim1], ldt, &c__, &s); + if (ilq) { + srot_(n, &q[jch * q_dim1 + 1], &c__1, &q[(jch + 1) + * q_dim1 + 1], &c__1, &c__, &s); + } + if (ilazr2) { + h__[jch + (jch - 1) * h_dim1] *= c__; + } + ilazr2 = FALSE_; + if ((r__1 = t[jch + 1 + (jch + 1) * t_dim1], abs(r__1) + ) >= btol) { + if (jch + 1 >= ilast) { + goto L80; + } else { + ifirst = jch + 1; + goto L110; + } + } + t[jch + 1 + (jch + 1) * t_dim1] = 0.f; +/* L40: */ + } + goto L70; + } else { + +/* Only test 2 passed -- chase the zero to T(ILAST,ILAST) */ +/* Then process as in the case T(ILAST,ILAST)=0 */ + + i__3 = ilast - 1; + for (jch = j; jch <= i__3; ++jch) { + temp = t[jch + (jch + 1) * t_dim1]; + slartg_(&temp, &t[jch + 1 + (jch + 1) * t_dim1], &c__, + &s, &t[jch + (jch + 1) * t_dim1]); + t[jch + 1 + (jch + 1) * t_dim1] = 0.f; + if (jch < ilastm - 1) { + i__4 = ilastm - jch - 1; + srot_(&i__4, &t[jch + (jch + 2) * t_dim1], ldt, & + t[jch + 1 + (jch + 2) * t_dim1], ldt, & + c__, &s); + } + i__4 = ilastm - jch + 2; + srot_(&i__4, &h__[jch + (jch - 1) * h_dim1], ldh, & + h__[jch + 1 + (jch - 1) * h_dim1], ldh, &c__, + &s); + if (ilq) { + srot_(n, &q[jch * q_dim1 + 1], &c__1, &q[(jch + 1) + * q_dim1 + 1], &c__1, &c__, &s); + } + temp = h__[jch + 1 + jch * h_dim1]; + slartg_(&temp, &h__[jch + 1 + (jch - 1) * h_dim1], & + c__, &s, &h__[jch + 1 + jch * h_dim1]); + h__[jch + 1 + (jch - 1) * h_dim1] = 0.f; + i__4 = jch + 1 - ifrstm; + srot_(&i__4, &h__[ifrstm + jch * h_dim1], &c__1, &h__[ + ifrstm + (jch - 1) * h_dim1], &c__1, &c__, &s) + ; + i__4 = jch - ifrstm; + srot_(&i__4, &t[ifrstm + jch * t_dim1], &c__1, &t[ + ifrstm + (jch - 1) * t_dim1], &c__1, &c__, &s) + ; + if (ilz) { + srot_(n, &z__[jch * z_dim1 + 1], &c__1, &z__[(jch + - 1) * z_dim1 + 1], &c__1, &c__, &s); + } +/* L50: */ + } + goto L70; + } + } else if (ilazro) { + +/* Only test 1 passed -- work on J:ILAST */ + + ifirst = j; + goto L110; + } + +/* Neither test passed -- try next J */ + +/* L60: */ + } + +/* (Drop-through is "impossible") */ + + *info = *n + 1; + goto L420; + +/* T(ILAST,ILAST)=0 -- clear H(ILAST,ILAST-1) to split off a */ +/* 1x1 block. */ + +L70: + temp = h__[ilast + ilast * h_dim1]; + slartg_(&temp, &h__[ilast + (ilast - 1) * h_dim1], &c__, &s, &h__[ + ilast + ilast * h_dim1]); + h__[ilast + (ilast - 1) * h_dim1] = 0.f; + i__2 = ilast - ifrstm; + srot_(&i__2, &h__[ifrstm + ilast * h_dim1], &c__1, &h__[ifrstm + ( + ilast - 1) * h_dim1], &c__1, &c__, &s); + i__2 = ilast - ifrstm; + srot_(&i__2, &t[ifrstm + ilast * t_dim1], &c__1, &t[ifrstm + (ilast - + 1) * t_dim1], &c__1, &c__, &s); + if (ilz) { + srot_(n, &z__[ilast * z_dim1 + 1], &c__1, &z__[(ilast - 1) * + z_dim1 + 1], &c__1, &c__, &s); + } + +/* H(ILAST,ILAST-1)=0 -- Standardize B, set ALPHAR, ALPHAI, */ +/* and BETA */ + +L80: + if (t[ilast + ilast * t_dim1] < 0.f) { + if (ilschr) { + i__2 = ilast; + for (j = ifrstm; j <= i__2; ++j) { + h__[j + ilast * h_dim1] = -h__[j + ilast * h_dim1]; + t[j + ilast * t_dim1] = -t[j + ilast * t_dim1]; +/* L90: */ + } + } else { + h__[ilast + ilast * h_dim1] = -h__[ilast + ilast * h_dim1]; + t[ilast + ilast * t_dim1] = -t[ilast + ilast * t_dim1]; + } + if (ilz) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + z__[j + ilast * z_dim1] = -z__[j + ilast * z_dim1]; +/* L100: */ + } + } + } + alphar[ilast] = h__[ilast + ilast * h_dim1]; + alphai[ilast] = 0.f; + beta[ilast] = t[ilast + ilast * t_dim1]; + +/* Go to next block -- exit if finished. */ + + --ilast; + if (ilast < *ilo) { + goto L380; + } + +/* Reset counters */ + + iiter = 0; + eshift = 0.f; + if (! ilschr) { + ilastm = ilast; + if (ifrstm > ilast) { + ifrstm = *ilo; + } + } + goto L350; + +/* QZ step */ + +/* This iteration only involves rows/columns IFIRST:ILAST. We */ +/* assume IFIRST < ILAST, and that the diagonal of B is non-zero. */ + +L110: + ++iiter; + if (! ilschr) { + ifrstm = ifirst; + } + +/* Compute single shifts. */ + +/* At this point, IFIRST < ILAST, and the diagonal elements of */ +/* T(IFIRST:ILAST,IFIRST,ILAST) are larger than BTOL (in */ +/* magnitude) */ + + if (iiter / 10 * 10 == iiter) { + +/* Exceptional shift. Chosen for no particularly good reason. */ +/* (Single shift only.) */ + + if ((real) maxit * safmin * (r__1 = h__[ilast + (ilast - 1) * + h_dim1], abs(r__1)) < (r__2 = t[ilast - 1 + (ilast - 1) * + t_dim1], abs(r__2))) { + eshift = h__[ilast + (ilast - 1) * h_dim1] / t[ilast - 1 + ( + ilast - 1) * t_dim1]; + } else { + eshift += 1.f / (safmin * (real) maxit); + } + s1 = 1.f; + wr = eshift; + + } else { + +/* Shifts based on the generalized eigenvalues of the */ +/* bottom-right 2x2 block of A and B. The first eigenvalue */ +/* returned by SLAG2 is the Wilkinson shift (AEP p.512), */ + + r__1 = safmin * 100.f; + slag2_(&h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &t[ilast - 1 + + (ilast - 1) * t_dim1], ldt, &r__1, &s1, &s2, &wr, &wr2, + &wi); + + if ((r__1 = wr / s1 * t[ilast + ilast * t_dim1] - h__[ilast + + ilast * h_dim1], abs(r__1)) > (r__2 = wr2 / s2 * t[ilast + + ilast * t_dim1] - h__[ilast + ilast * h_dim1], abs(r__2) + )) { + temp = wr; + wr = wr2; + wr2 = temp; + temp = s1; + s1 = s2; + s2 = temp; + } +/* Computing MAX */ +/* Computing MAX */ + r__3 = 1.f, r__4 = abs(wr), r__3 = f2cmax(r__3,r__4), r__4 = abs(wi); + r__1 = s1, r__2 = safmin * f2cmax(r__3,r__4); + temp = f2cmax(r__1,r__2); + if (wi != 0.f) { + goto L200; + } + } + +/* Fiddle with shift to avoid overflow */ + + temp = f2cmin(ascale,1.f) * (safmax * .5f); + if (s1 > temp) { + scale = temp / s1; + } else { + scale = 1.f; + } + + temp = f2cmin(bscale,1.f) * (safmax * .5f); + if (abs(wr) > temp) { +/* Computing MIN */ + r__1 = scale, r__2 = temp / abs(wr); + scale = f2cmin(r__1,r__2); + } + s1 = scale * s1; + wr = scale * wr; + +/* Now check for two consecutive small subdiagonals. */ + + i__2 = ifirst + 1; + for (j = ilast - 1; j >= i__2; --j) { + istart = j; + temp = (r__1 = s1 * h__[j + (j - 1) * h_dim1], abs(r__1)); + temp2 = (r__1 = s1 * h__[j + j * h_dim1] - wr * t[j + j * t_dim1], + abs(r__1)); + tempr = f2cmax(temp,temp2); + if (tempr < 1.f && tempr != 0.f) { + temp /= tempr; + temp2 /= tempr; + } + if ((r__1 = ascale * h__[j + 1 + j * h_dim1] * temp, abs(r__1)) <= + ascale * atol * temp2) { + goto L130; + } +/* L120: */ + } + + istart = ifirst; +L130: + +/* Do an implicit single-shift QZ sweep. */ + +/* Initial Q */ + + temp = s1 * h__[istart + istart * h_dim1] - wr * t[istart + istart * + t_dim1]; + temp2 = s1 * h__[istart + 1 + istart * h_dim1]; + slartg_(&temp, &temp2, &c__, &s, &tempr); + +/* Sweep */ + + i__2 = ilast - 1; + for (j = istart; j <= i__2; ++j) { + if (j > istart) { + temp = h__[j + (j - 1) * h_dim1]; + slartg_(&temp, &h__[j + 1 + (j - 1) * h_dim1], &c__, &s, &h__[ + j + (j - 1) * h_dim1]); + h__[j + 1 + (j - 1) * h_dim1] = 0.f; + } + + i__3 = ilastm; + for (jc = j; jc <= i__3; ++jc) { + temp = c__ * h__[j + jc * h_dim1] + s * h__[j + 1 + jc * + h_dim1]; + h__[j + 1 + jc * h_dim1] = -s * h__[j + jc * h_dim1] + c__ * + h__[j + 1 + jc * h_dim1]; + h__[j + jc * h_dim1] = temp; + temp2 = c__ * t[j + jc * t_dim1] + s * t[j + 1 + jc * t_dim1]; + t[j + 1 + jc * t_dim1] = -s * t[j + jc * t_dim1] + c__ * t[j + + 1 + jc * t_dim1]; + t[j + jc * t_dim1] = temp2; +/* L140: */ + } + if (ilq) { + i__3 = *n; + for (jr = 1; jr <= i__3; ++jr) { + temp = c__ * q[jr + j * q_dim1] + s * q[jr + (j + 1) * + q_dim1]; + q[jr + (j + 1) * q_dim1] = -s * q[jr + j * q_dim1] + c__ * + q[jr + (j + 1) * q_dim1]; + q[jr + j * q_dim1] = temp; +/* L150: */ + } + } + + temp = t[j + 1 + (j + 1) * t_dim1]; + slartg_(&temp, &t[j + 1 + j * t_dim1], &c__, &s, &t[j + 1 + (j + + 1) * t_dim1]); + t[j + 1 + j * t_dim1] = 0.f; + +/* Computing MIN */ + i__4 = j + 2; + i__3 = f2cmin(i__4,ilast); + for (jr = ifrstm; jr <= i__3; ++jr) { + temp = c__ * h__[jr + (j + 1) * h_dim1] + s * h__[jr + j * + h_dim1]; + h__[jr + j * h_dim1] = -s * h__[jr + (j + 1) * h_dim1] + c__ * + h__[jr + j * h_dim1]; + h__[jr + (j + 1) * h_dim1] = temp; +/* L160: */ + } + i__3 = j; + for (jr = ifrstm; jr <= i__3; ++jr) { + temp = c__ * t[jr + (j + 1) * t_dim1] + s * t[jr + j * t_dim1] + ; + t[jr + j * t_dim1] = -s * t[jr + (j + 1) * t_dim1] + c__ * t[ + jr + j * t_dim1]; + t[jr + (j + 1) * t_dim1] = temp; +/* L170: */ + } + if (ilz) { + i__3 = *n; + for (jr = 1; jr <= i__3; ++jr) { + temp = c__ * z__[jr + (j + 1) * z_dim1] + s * z__[jr + j * + z_dim1]; + z__[jr + j * z_dim1] = -s * z__[jr + (j + 1) * z_dim1] + + c__ * z__[jr + j * z_dim1]; + z__[jr + (j + 1) * z_dim1] = temp; +/* L180: */ + } + } +/* L190: */ + } + + goto L350; + +/* Use Francis double-shift */ + +/* Note: the Francis double-shift should work with real shifts, */ +/* but only if the block is at least 3x3. */ +/* This code may break if this point is reached with */ +/* a 2x2 block with real eigenvalues. */ + +L200: + if (ifirst + 1 == ilast) { + +/* Special case -- 2x2 block with complex eigenvectors */ + +/* Step 1: Standardize, that is, rotate so that */ + +/* ( B11 0 ) */ +/* B = ( ) with B11 non-negative. */ +/* ( 0 B22 ) */ + + slasv2_(&t[ilast - 1 + (ilast - 1) * t_dim1], &t[ilast - 1 + + ilast * t_dim1], &t[ilast + ilast * t_dim1], &b22, &b11, & + sr, &cr, &sl, &cl); + + if (b11 < 0.f) { + cr = -cr; + sr = -sr; + b11 = -b11; + b22 = -b22; + } + + i__2 = ilastm + 1 - ifirst; + srot_(&i__2, &h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &h__[ + ilast + (ilast - 1) * h_dim1], ldh, &cl, &sl); + i__2 = ilast + 1 - ifrstm; + srot_(&i__2, &h__[ifrstm + (ilast - 1) * h_dim1], &c__1, &h__[ + ifrstm + ilast * h_dim1], &c__1, &cr, &sr); + + if (ilast < ilastm) { + i__2 = ilastm - ilast; + srot_(&i__2, &t[ilast - 1 + (ilast + 1) * t_dim1], ldt, &t[ + ilast + (ilast + 1) * t_dim1], ldt, &cl, &sl); + } + if (ifrstm < ilast - 1) { + i__2 = ifirst - ifrstm; + srot_(&i__2, &t[ifrstm + (ilast - 1) * t_dim1], &c__1, &t[ + ifrstm + ilast * t_dim1], &c__1, &cr, &sr); + } + + if (ilq) { + srot_(n, &q[(ilast - 1) * q_dim1 + 1], &c__1, &q[ilast * + q_dim1 + 1], &c__1, &cl, &sl); + } + if (ilz) { + srot_(n, &z__[(ilast - 1) * z_dim1 + 1], &c__1, &z__[ilast * + z_dim1 + 1], &c__1, &cr, &sr); + } + + t[ilast - 1 + (ilast - 1) * t_dim1] = b11; + t[ilast - 1 + ilast * t_dim1] = 0.f; + t[ilast + (ilast - 1) * t_dim1] = 0.f; + t[ilast + ilast * t_dim1] = b22; + +/* If B22 is negative, negate column ILAST */ + + if (b22 < 0.f) { + i__2 = ilast; + for (j = ifrstm; j <= i__2; ++j) { + h__[j + ilast * h_dim1] = -h__[j + ilast * h_dim1]; + t[j + ilast * t_dim1] = -t[j + ilast * t_dim1]; +/* L210: */ + } + + if (ilz) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + z__[j + ilast * z_dim1] = -z__[j + ilast * z_dim1]; +/* L220: */ + } + } + b22 = -b22; + } + +/* Step 2: Compute ALPHAR, ALPHAI, and BETA (see refs.) */ + +/* Recompute shift */ + + r__1 = safmin * 100.f; + slag2_(&h__[ilast - 1 + (ilast - 1) * h_dim1], ldh, &t[ilast - 1 + + (ilast - 1) * t_dim1], ldt, &r__1, &s1, &temp, &wr, & + temp2, &wi); + +/* If standardization has perturbed the shift onto real line, */ +/* do another (real single-shift) QR step. */ + + if (wi == 0.f) { + goto L350; + } + s1inv = 1.f / s1; + +/* Do EISPACK (QZVAL) computation of alpha and beta */ + + a11 = h__[ilast - 1 + (ilast - 1) * h_dim1]; + a21 = h__[ilast + (ilast - 1) * h_dim1]; + a12 = h__[ilast - 1 + ilast * h_dim1]; + a22 = h__[ilast + ilast * h_dim1]; + +/* Compute complex Givens rotation on right */ +/* (Assume some element of C = (sA - wB) > unfl ) */ +/* __ */ +/* (sA - wB) ( CZ -SZ ) */ +/* ( SZ CZ ) */ + + c11r = s1 * a11 - wr * b11; + c11i = -wi * b11; + c12 = s1 * a12; + c21 = s1 * a21; + c22r = s1 * a22 - wr * b22; + c22i = -wi * b22; + + if (abs(c11r) + abs(c11i) + abs(c12) > abs(c21) + abs(c22r) + abs( + c22i)) { + t1 = slapy3_(&c12, &c11r, &c11i); + cz = c12 / t1; + szr = -c11r / t1; + szi = -c11i / t1; + } else { + cz = slapy2_(&c22r, &c22i); + if (cz <= safmin) { + cz = 0.f; + szr = 1.f; + szi = 0.f; + } else { + tempr = c22r / cz; + tempi = c22i / cz; + t1 = slapy2_(&cz, &c21); + cz /= t1; + szr = -c21 * tempr / t1; + szi = c21 * tempi / t1; + } + } + +/* Compute Givens rotation on left */ + +/* ( CQ SQ ) */ +/* ( __ ) A or B */ +/* ( -SQ CQ ) */ + + an = abs(a11) + abs(a12) + abs(a21) + abs(a22); + bn = abs(b11) + abs(b22); + wabs = abs(wr) + abs(wi); + if (s1 * an > wabs * bn) { + cq = cz * b11; + sqr = szr * b22; + sqi = -szi * b22; + } else { + a1r = cz * a11 + szr * a12; + a1i = szi * a12; + a2r = cz * a21 + szr * a22; + a2i = szi * a22; + cq = slapy2_(&a1r, &a1i); + if (cq <= safmin) { + cq = 0.f; + sqr = 1.f; + sqi = 0.f; + } else { + tempr = a1r / cq; + tempi = a1i / cq; + sqr = tempr * a2r + tempi * a2i; + sqi = tempi * a2r - tempr * a2i; + } + } + t1 = slapy3_(&cq, &sqr, &sqi); + cq /= t1; + sqr /= t1; + sqi /= t1; + +/* Compute diagonal elements of QBZ */ + + tempr = sqr * szr - sqi * szi; + tempi = sqr * szi + sqi * szr; + b1r = cq * cz * b11 + tempr * b22; + b1i = tempi * b22; + b1a = slapy2_(&b1r, &b1i); + b2r = cq * cz * b22 + tempr * b11; + b2i = -tempi * b11; + b2a = slapy2_(&b2r, &b2i); + +/* Normalize so beta > 0, and Im( alpha1 ) > 0 */ + + beta[ilast - 1] = b1a; + beta[ilast] = b2a; + alphar[ilast - 1] = wr * b1a * s1inv; + alphai[ilast - 1] = wi * b1a * s1inv; + alphar[ilast] = wr * b2a * s1inv; + alphai[ilast] = -(wi * b2a) * s1inv; + +/* Step 3: Go to next block -- exit if finished. */ + + ilast = ifirst - 1; + if (ilast < *ilo) { + goto L380; + } + +/* Reset counters */ + + iiter = 0; + eshift = 0.f; + if (! ilschr) { + ilastm = ilast; + if (ifrstm > ilast) { + ifrstm = *ilo; + } + } + goto L350; + } else { + +/* Usual case: 3x3 or larger block, using Francis implicit */ +/* double-shift */ + +/* 2 */ +/* Eigenvalue equation is w - c w + d = 0, */ + +/* -1 2 -1 */ +/* so compute 1st column of (A B ) - c A B + d */ +/* using the formula in QZIT (from EISPACK) */ + +/* We assume that the block is at least 3x3 */ + + ad11 = ascale * h__[ilast - 1 + (ilast - 1) * h_dim1] / (bscale * + t[ilast - 1 + (ilast - 1) * t_dim1]); + ad21 = ascale * h__[ilast + (ilast - 1) * h_dim1] / (bscale * t[ + ilast - 1 + (ilast - 1) * t_dim1]); + ad12 = ascale * h__[ilast - 1 + ilast * h_dim1] / (bscale * t[ + ilast + ilast * t_dim1]); + ad22 = ascale * h__[ilast + ilast * h_dim1] / (bscale * t[ilast + + ilast * t_dim1]); + u12 = t[ilast - 1 + ilast * t_dim1] / t[ilast + ilast * t_dim1]; + ad11l = ascale * h__[ifirst + ifirst * h_dim1] / (bscale * t[ + ifirst + ifirst * t_dim1]); + ad21l = ascale * h__[ifirst + 1 + ifirst * h_dim1] / (bscale * t[ + ifirst + ifirst * t_dim1]); + ad12l = ascale * h__[ifirst + (ifirst + 1) * h_dim1] / (bscale * + t[ifirst + 1 + (ifirst + 1) * t_dim1]); + ad22l = ascale * h__[ifirst + 1 + (ifirst + 1) * h_dim1] / ( + bscale * t[ifirst + 1 + (ifirst + 1) * t_dim1]); + ad32l = ascale * h__[ifirst + 2 + (ifirst + 1) * h_dim1] / ( + bscale * t[ifirst + 1 + (ifirst + 1) * t_dim1]); + u12l = t[ifirst + (ifirst + 1) * t_dim1] / t[ifirst + 1 + (ifirst + + 1) * t_dim1]; + + v[0] = (ad11 - ad11l) * (ad22 - ad11l) - ad12 * ad21 + ad21 * u12 + * ad11l + (ad12l - ad11l * u12l) * ad21l; + v[1] = (ad22l - ad11l - ad21l * u12l - (ad11 - ad11l) - (ad22 - + ad11l) + ad21 * u12) * ad21l; + v[2] = ad32l * ad21l; + + istart = ifirst; + + slarfg_(&c__3, v, &v[1], &c__1, &tau); + v[0] = 1.f; + +/* Sweep */ + + i__2 = ilast - 2; + for (j = istart; j <= i__2; ++j) { + +/* All but last elements: use 3x3 Householder transforms. */ + +/* Zero (j-1)st column of A */ + + if (j > istart) { + v[0] = h__[j + (j - 1) * h_dim1]; + v[1] = h__[j + 1 + (j - 1) * h_dim1]; + v[2] = h__[j + 2 + (j - 1) * h_dim1]; + + slarfg_(&c__3, &h__[j + (j - 1) * h_dim1], &v[1], &c__1, & + tau); + v[0] = 1.f; + h__[j + 1 + (j - 1) * h_dim1] = 0.f; + h__[j + 2 + (j - 1) * h_dim1] = 0.f; + } + + i__3 = ilastm; + for (jc = j; jc <= i__3; ++jc) { + temp = tau * (h__[j + jc * h_dim1] + v[1] * h__[j + 1 + + jc * h_dim1] + v[2] * h__[j + 2 + jc * h_dim1]); + h__[j + jc * h_dim1] -= temp; + h__[j + 1 + jc * h_dim1] -= temp * v[1]; + h__[j + 2 + jc * h_dim1] -= temp * v[2]; + temp2 = tau * (t[j + jc * t_dim1] + v[1] * t[j + 1 + jc * + t_dim1] + v[2] * t[j + 2 + jc * t_dim1]); + t[j + jc * t_dim1] -= temp2; + t[j + 1 + jc * t_dim1] -= temp2 * v[1]; + t[j + 2 + jc * t_dim1] -= temp2 * v[2]; +/* L230: */ + } + if (ilq) { + i__3 = *n; + for (jr = 1; jr <= i__3; ++jr) { + temp = tau * (q[jr + j * q_dim1] + v[1] * q[jr + (j + + 1) * q_dim1] + v[2] * q[jr + (j + 2) * q_dim1] + ); + q[jr + j * q_dim1] -= temp; + q[jr + (j + 1) * q_dim1] -= temp * v[1]; + q[jr + (j + 2) * q_dim1] -= temp * v[2]; +/* L240: */ + } + } + +/* Zero j-th column of B (see SLAGBC for details) */ + +/* Swap rows to pivot */ + + ilpivt = FALSE_; +/* Computing MAX */ + r__3 = (r__1 = t[j + 1 + (j + 1) * t_dim1], abs(r__1)), r__4 = + (r__2 = t[j + 1 + (j + 2) * t_dim1], abs(r__2)); + temp = f2cmax(r__3,r__4); +/* Computing MAX */ + r__3 = (r__1 = t[j + 2 + (j + 1) * t_dim1], abs(r__1)), r__4 = + (r__2 = t[j + 2 + (j + 2) * t_dim1], abs(r__2)); + temp2 = f2cmax(r__3,r__4); + if (f2cmax(temp,temp2) < safmin) { + scale = 0.f; + u1 = 1.f; + u2 = 0.f; + goto L250; + } else if (temp >= temp2) { + w11 = t[j + 1 + (j + 1) * t_dim1]; + w21 = t[j + 2 + (j + 1) * t_dim1]; + w12 = t[j + 1 + (j + 2) * t_dim1]; + w22 = t[j + 2 + (j + 2) * t_dim1]; + u1 = t[j + 1 + j * t_dim1]; + u2 = t[j + 2 + j * t_dim1]; + } else { + w21 = t[j + 1 + (j + 1) * t_dim1]; + w11 = t[j + 2 + (j + 1) * t_dim1]; + w22 = t[j + 1 + (j + 2) * t_dim1]; + w12 = t[j + 2 + (j + 2) * t_dim1]; + u2 = t[j + 1 + j * t_dim1]; + u1 = t[j + 2 + j * t_dim1]; + } + +/* Swap columns if nec. */ + + if (abs(w12) > abs(w11)) { + ilpivt = TRUE_; + temp = w12; + temp2 = w22; + w12 = w11; + w22 = w21; + w11 = temp; + w21 = temp2; + } + +/* LU-factor */ + + temp = w21 / w11; + u2 -= temp * u1; + w22 -= temp * w12; + w21 = 0.f; + +/* Compute SCALE */ + + scale = 1.f; + if (abs(w22) < safmin) { + scale = 0.f; + u2 = 1.f; + u1 = -w12 / w11; + goto L250; + } + if (abs(w22) < abs(u2)) { + scale = (r__1 = w22 / u2, abs(r__1)); + } + if (abs(w11) < abs(u1)) { +/* Computing MIN */ + r__2 = scale, r__3 = (r__1 = w11 / u1, abs(r__1)); + scale = f2cmin(r__2,r__3); + } + +/* Solve */ + + u2 = scale * u2 / w22; + u1 = (scale * u1 - w12 * u2) / w11; + +L250: + if (ilpivt) { + temp = u2; + u2 = u1; + u1 = temp; + } + +/* Compute Householder Vector */ + +/* Computing 2nd power */ + r__1 = scale; +/* Computing 2nd power */ + r__2 = u1; +/* Computing 2nd power */ + r__3 = u2; + t1 = sqrt(r__1 * r__1 + r__2 * r__2 + r__3 * r__3); + tau = scale / t1 + 1.f; + vs = -1.f / (scale + t1); + v[0] = 1.f; + v[1] = vs * u1; + v[2] = vs * u2; + +/* Apply transformations from the right. */ + +/* Computing MIN */ + i__4 = j + 3; + i__3 = f2cmin(i__4,ilast); + for (jr = ifrstm; jr <= i__3; ++jr) { + temp = tau * (h__[jr + j * h_dim1] + v[1] * h__[jr + (j + + 1) * h_dim1] + v[2] * h__[jr + (j + 2) * h_dim1]); + h__[jr + j * h_dim1] -= temp; + h__[jr + (j + 1) * h_dim1] -= temp * v[1]; + h__[jr + (j + 2) * h_dim1] -= temp * v[2]; +/* L260: */ + } + i__3 = j + 2; + for (jr = ifrstm; jr <= i__3; ++jr) { + temp = tau * (t[jr + j * t_dim1] + v[1] * t[jr + (j + 1) * + t_dim1] + v[2] * t[jr + (j + 2) * t_dim1]); + t[jr + j * t_dim1] -= temp; + t[jr + (j + 1) * t_dim1] -= temp * v[1]; + t[jr + (j + 2) * t_dim1] -= temp * v[2]; +/* L270: */ + } + if (ilz) { + i__3 = *n; + for (jr = 1; jr <= i__3; ++jr) { + temp = tau * (z__[jr + j * z_dim1] + v[1] * z__[jr + ( + j + 1) * z_dim1] + v[2] * z__[jr + (j + 2) * + z_dim1]); + z__[jr + j * z_dim1] -= temp; + z__[jr + (j + 1) * z_dim1] -= temp * v[1]; + z__[jr + (j + 2) * z_dim1] -= temp * v[2]; +/* L280: */ + } + } + t[j + 1 + j * t_dim1] = 0.f; + t[j + 2 + j * t_dim1] = 0.f; +/* L290: */ + } + +/* Last elements: Use Givens rotations */ + +/* Rotations from the left */ + + j = ilast - 1; + temp = h__[j + (j - 1) * h_dim1]; + slartg_(&temp, &h__[j + 1 + (j - 1) * h_dim1], &c__, &s, &h__[j + + (j - 1) * h_dim1]); + h__[j + 1 + (j - 1) * h_dim1] = 0.f; + + i__2 = ilastm; + for (jc = j; jc <= i__2; ++jc) { + temp = c__ * h__[j + jc * h_dim1] + s * h__[j + 1 + jc * + h_dim1]; + h__[j + 1 + jc * h_dim1] = -s * h__[j + jc * h_dim1] + c__ * + h__[j + 1 + jc * h_dim1]; + h__[j + jc * h_dim1] = temp; + temp2 = c__ * t[j + jc * t_dim1] + s * t[j + 1 + jc * t_dim1]; + t[j + 1 + jc * t_dim1] = -s * t[j + jc * t_dim1] + c__ * t[j + + 1 + jc * t_dim1]; + t[j + jc * t_dim1] = temp2; +/* L300: */ + } + if (ilq) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + temp = c__ * q[jr + j * q_dim1] + s * q[jr + (j + 1) * + q_dim1]; + q[jr + (j + 1) * q_dim1] = -s * q[jr + j * q_dim1] + c__ * + q[jr + (j + 1) * q_dim1]; + q[jr + j * q_dim1] = temp; +/* L310: */ + } + } + +/* Rotations from the right. */ + + temp = t[j + 1 + (j + 1) * t_dim1]; + slartg_(&temp, &t[j + 1 + j * t_dim1], &c__, &s, &t[j + 1 + (j + + 1) * t_dim1]); + t[j + 1 + j * t_dim1] = 0.f; + + i__2 = ilast; + for (jr = ifrstm; jr <= i__2; ++jr) { + temp = c__ * h__[jr + (j + 1) * h_dim1] + s * h__[jr + j * + h_dim1]; + h__[jr + j * h_dim1] = -s * h__[jr + (j + 1) * h_dim1] + c__ * + h__[jr + j * h_dim1]; + h__[jr + (j + 1) * h_dim1] = temp; +/* L320: */ + } + i__2 = ilast - 1; + for (jr = ifrstm; jr <= i__2; ++jr) { + temp = c__ * t[jr + (j + 1) * t_dim1] + s * t[jr + j * t_dim1] + ; + t[jr + j * t_dim1] = -s * t[jr + (j + 1) * t_dim1] + c__ * t[ + jr + j * t_dim1]; + t[jr + (j + 1) * t_dim1] = temp; +/* L330: */ + } + if (ilz) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + temp = c__ * z__[jr + (j + 1) * z_dim1] + s * z__[jr + j * + z_dim1]; + z__[jr + j * z_dim1] = -s * z__[jr + (j + 1) * z_dim1] + + c__ * z__[jr + j * z_dim1]; + z__[jr + (j + 1) * z_dim1] = temp; +/* L340: */ + } + } + +/* End of Double-Shift code */ + + } + + goto L350; + +/* End of iteration loop */ + +L350: +/* L360: */ + ; + } + +/* Drop-through = non-convergence */ + + *info = ilast; + goto L420; + +/* Successful completion of all QZ steps */ + +L380: + +/* Set Eigenvalues 1:ILO-1 */ + + i__1 = *ilo - 1; + for (j = 1; j <= i__1; ++j) { + if (t[j + j * t_dim1] < 0.f) { + if (ilschr) { + i__2 = j; + for (jr = 1; jr <= i__2; ++jr) { + h__[jr + j * h_dim1] = -h__[jr + j * h_dim1]; + t[jr + j * t_dim1] = -t[jr + j * t_dim1]; +/* L390: */ + } + } else { + h__[j + j * h_dim1] = -h__[j + j * h_dim1]; + t[j + j * t_dim1] = -t[j + j * t_dim1]; + } + if (ilz) { + i__2 = *n; + for (jr = 1; jr <= i__2; ++jr) { + z__[jr + j * z_dim1] = -z__[jr + j * z_dim1]; +/* L400: */ + } + } + } + alphar[j] = h__[j + j * h_dim1]; + alphai[j] = 0.f; + beta[j] = t[j + j * t_dim1]; +/* L410: */ + } + +/* Normal Termination */ + + *info = 0; + +/* Exit (other than argument error) -- return optimal workspace size */ + +L420: + work[1] = (real) (*n); + return 0; + +/* End of SHGEQZ */ + +} /* shgeqz_ */ + diff --git a/lapack-netlib/SRC/shsein.c b/lapack-netlib/SRC/shsein.c new file mode 100644 index 000000000..f594e5089 --- /dev/null +++ b/lapack-netlib/SRC/shsein.c @@ -0,0 +1,968 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SHSEIN */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SHSEIN + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI, */ +/* VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL, */ +/* IFAILR, INFO ) */ + +/* CHARACTER EIGSRC, INITV, SIDE */ +/* INTEGER INFO, LDH, LDVL, LDVR, M, MM, N */ +/* LOGICAL SELECT( * ) */ +/* INTEGER IFAILL( * ), IFAILR( * ) */ +/* REAL H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), */ +/* $ WI( * ), WORK( * ), WR( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SHSEIN uses inverse iteration to find specified right and/or left */ +/* > eigenvectors of a real upper Hessenberg matrix H. */ +/* > */ +/* > The right eigenvector x and the left eigenvector y of the matrix H */ +/* > corresponding to an eigenvalue w are defined by: */ +/* > */ +/* > H * x = w * x, y**h * H = w * y**h */ +/* > */ +/* > where y**h denotes the conjugate transpose of the vector y. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'R': compute right eigenvectors only; */ +/* > = 'L': compute left eigenvectors only; */ +/* > = 'B': compute both right and left eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EIGSRC */ +/* > \verbatim */ +/* > EIGSRC is CHARACTER*1 */ +/* > Specifies the source of eigenvalues supplied in (WR,WI): */ +/* > = 'Q': the eigenvalues were found using SHSEQR; thus, if */ +/* > H has zero subdiagonal elements, and so is */ +/* > block-triangular, then the j-th eigenvalue can be */ +/* > assumed to be an eigenvalue of the block containing */ +/* > the j-th row/column. This property allows SHSEIN to */ +/* > perform inverse iteration on just one diagonal block. */ +/* > = 'N': no assumptions are made on the correspondence */ +/* > between eigenvalues and diagonal blocks. In this */ +/* > case, SHSEIN must always perform inverse iteration */ +/* > using the whole matrix H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INITV */ +/* > \verbatim */ +/* > INITV is CHARACTER*1 */ +/* > = 'N': no initial vectors are supplied; */ +/* > = 'U': user-supplied initial vectors are stored in the arrays */ +/* > VL and/or VR. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SELECT */ +/* > \verbatim */ +/* > SELECT is LOGICAL array, dimension (N) */ +/* > Specifies the eigenvectors to be computed. To select the */ +/* > real eigenvector corresponding to a real eigenvalue WR(j), */ +/* > SELECT(j) must be set to .TRUE.. To select the complex */ +/* > eigenvector corresponding to a complex eigenvalue */ +/* > (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)), */ +/* > either SELECT(j) or SELECT(j+1) or both must be set to */ +/* > .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is */ +/* > .FALSE.. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix H. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] H */ +/* > \verbatim */ +/* > H is REAL array, dimension (LDH,N) */ +/* > The upper Hessenberg matrix H. */ +/* > If a NaN is detected in H, the routine will return with INFO=-6. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDH */ +/* > \verbatim */ +/* > LDH is INTEGER */ +/* > The leading dimension of the array H. LDH >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] WR */ +/* > \verbatim */ +/* > WR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] WI */ +/* > \verbatim */ +/* > WI is REAL array, dimension (N) */ +/* > */ +/* > On entry, the real and imaginary parts of the eigenvalues of */ +/* > H; a complex conjugate pair of eigenvalues must be stored in */ +/* > consecutive elements of WR and WI. */ +/* > On exit, WR may have been altered since close eigenvalues */ +/* > are perturbed slightly in searching for independent */ +/* > eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VL */ +/* > \verbatim */ +/* > VL is REAL array, dimension (LDVL,MM) */ +/* > On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must */ +/* > contain starting vectors for the inverse iteration for the */ +/* > left eigenvectors; the starting vector for each eigenvector */ +/* > must be in the same column(s) in which the eigenvector will */ +/* > be stored. */ +/* > On exit, if SIDE = 'L' or 'B', the left eigenvectors */ +/* > specified by SELECT will be stored consecutively in the */ +/* > columns of VL, in the same order as their eigenvalues. A */ +/* > complex eigenvector corresponding to a complex eigenvalue is */ +/* > stored in two consecutive columns, the first holding the real */ +/* > part and the second the imaginary part. */ +/* > If SIDE = 'R', VL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVL */ +/* > \verbatim */ +/* > LDVL is INTEGER */ +/* > The leading dimension of the array VL. */ +/* > LDVL >= f2cmax(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VR */ +/* > \verbatim */ +/* > VR is REAL array, dimension (LDVR,MM) */ +/* > On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must */ +/* > contain starting vectors for the inverse iteration for the */ +/* > right eigenvectors; the starting vector for each eigenvector */ +/* > must be in the same column(s) in which the eigenvector will */ +/* > be stored. */ +/* > On exit, if SIDE = 'R' or 'B', the right eigenvectors */ +/* > specified by SELECT will be stored consecutively in the */ +/* > columns of VR, in the same order as their eigenvalues. A */ +/* > complex eigenvector corresponding to a complex eigenvalue is */ +/* > stored in two consecutive columns, the first holding the real */ +/* > part and the second the imaginary part. */ +/* > If SIDE = 'L', VR is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVR */ +/* > \verbatim */ +/* > LDVR is INTEGER */ +/* > The leading dimension of the array VR. */ +/* > LDVR >= f2cmax(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MM */ +/* > \verbatim */ +/* > MM is INTEGER */ +/* > The number of columns in the arrays VL and/or VR. MM >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of columns in the arrays VL and/or VR required to */ +/* > store the eigenvectors; each selected real eigenvector */ +/* > occupies one column and each selected complex eigenvector */ +/* > occupies two columns. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension ((N+2)*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAILL */ +/* > \verbatim */ +/* > IFAILL is INTEGER array, dimension (MM) */ +/* > If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left */ +/* > eigenvector in the i-th column of VL (corresponding to the */ +/* > eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the */ +/* > eigenvector converged satisfactorily. If the i-th and (i+1)th */ +/* > columns of VL hold a complex eigenvector, then IFAILL(i) and */ +/* > IFAILL(i+1) are set to the same value. */ +/* > If SIDE = 'R', IFAILL is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IFAILR */ +/* > \verbatim */ +/* > IFAILR is INTEGER array, dimension (MM) */ +/* > If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right */ +/* > eigenvector in the i-th column of VR (corresponding to the */ +/* > eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the */ +/* > eigenvector converged satisfactorily. If the i-th and (i+1)th */ +/* > columns of VR hold a complex eigenvector, then IFAILR(i) and */ +/* > IFAILR(i+1) are set to the same value. */ +/* > If SIDE = 'L', IFAILR is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, i is the number of eigenvectors which */ +/* > failed to converge; see IFAILL and IFAILR for further */ +/* > details. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Each eigenvector is normalized so that the element of largest */ +/* > magnitude has magnitude 1; here the magnitude of a complex number */ +/* > (x,y) is taken to be |x|+|y|. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int shsein_(char *side, char *eigsrc, char *initv, logical * + select, integer *n, real *h__, integer *ldh, real *wr, real *wi, real + *vl, integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, + real *work, integer *ifaill, integer *ifailr, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2; + real r__1, r__2; + + /* Local variables */ + logical pair; + real unfl; + integer i__, k; + extern logical lsame_(char *, char *); + integer iinfo; + logical leftv, bothv; + real hnorm; + integer kl, kr; + extern real slamch_(char *); + extern /* Subroutine */ int slaein_(logical *, logical *, integer *, real + *, integer *, real *, real *, real *, real *, real *, integer *, + real *, real *, real *, real *, integer *), xerbla_(char *, + integer *, ftnlen); + real bignum; + extern real slanhs_(char *, integer *, real *, integer *, real *); + extern logical sisnan_(real *); + logical noinit; + integer ldwork; + logical rightv, fromqr; + real smlnum; + integer kln, ksi; + real wki; + integer ksr; + real ulp, wkr, eps3; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* Decode and test the input parameters. */ + + /* Parameter adjustments */ + --select; + h_dim1 = *ldh; + h_offset = 1 + h_dim1 * 1; + h__ -= h_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1 * 1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1 * 1; + vr -= vr_offset; + --work; + --ifaill; + --ifailr; + + /* Function Body */ + bothv = lsame_(side, "B"); + rightv = lsame_(side, "R") || bothv; + leftv = lsame_(side, "L") || bothv; + + fromqr = lsame_(eigsrc, "Q"); + + noinit = lsame_(initv, "N"); + +/* Set M to the number of columns required to store the selected */ +/* eigenvectors, and standardize the array SELECT. */ + + *m = 0; + pair = FALSE_; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (pair) { + pair = FALSE_; + select[k] = FALSE_; + } else { + if (wi[k] == 0.f) { + if (select[k]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[k] || select[k + 1]) { + select[k] = TRUE_; + *m += 2; + } + } + } +/* L10: */ + } + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! fromqr && ! lsame_(eigsrc, "N")) { + *info = -2; + } else if (! noinit && ! lsame_(initv, "U")) { + *info = -3; + } else if (*n < 0) { + *info = -5; + } else if (*ldh < f2cmax(1,*n)) { + *info = -7; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -11; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -13; + } else if (*mm < *m) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SHSEIN", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set machine-dependent constants. */ + + unfl = slamch_("Safe minimum"); + ulp = slamch_("Precision"); + smlnum = unfl * (*n / ulp); + bignum = (1.f - ulp) / smlnum; + + ldwork = *n + 1; + + kl = 1; + kln = 0; + if (fromqr) { + kr = 0; + } else { + kr = *n; + } + ksr = 1; + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (select[k]) { + +/* Compute eigenvector(s) corresponding to W(K). */ + + if (fromqr) { + +/* If affiliation of eigenvalues is known, check whether */ +/* the matrix splits. */ + +/* Determine KL and KR such that 1 <= KL <= K <= KR <= N */ +/* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or */ +/* KR = N). */ + +/* Then inverse iteration can be performed with the */ +/* submatrix H(KL:N,KL:N) for a left eigenvector, and with */ +/* the submatrix H(1:KR,1:KR) for a right eigenvector. */ + + i__2 = kl + 1; + for (i__ = k; i__ >= i__2; --i__) { + if (h__[i__ + (i__ - 1) * h_dim1] == 0.f) { + goto L30; + } +/* L20: */ + } +L30: + kl = i__; + if (k > kr) { + i__2 = *n - 1; + for (i__ = k; i__ <= i__2; ++i__) { + if (h__[i__ + 1 + i__ * h_dim1] == 0.f) { + goto L50; + } +/* L40: */ + } +L50: + kr = i__; + } + } + + if (kl != kln) { + kln = kl; + +/* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it */ +/* has not ben computed before. */ + + i__2 = kr - kl + 1; + hnorm = slanhs_("I", &i__2, &h__[kl + kl * h_dim1], ldh, & + work[1]); + if (sisnan_(&hnorm)) { + *info = -6; + return 0; + } else if (hnorm > 0.f) { + eps3 = hnorm * ulp; + } else { + eps3 = smlnum; + } + } + +/* Perturb eigenvalue if it is close to any previous */ +/* selected eigenvalues affiliated to the submatrix */ +/* H(KL:KR,KL:KR). Close roots are modified by EPS3. */ + + wkr = wr[k]; + wki = wi[k]; +L60: + i__2 = kl; + for (i__ = k - 1; i__ >= i__2; --i__) { + if (select[i__] && (r__1 = wr[i__] - wkr, abs(r__1)) + (r__2 = + wi[i__] - wki, abs(r__2)) < eps3) { + wkr += eps3; + goto L60; + } +/* L70: */ + } + wr[k] = wkr; + + pair = wki != 0.f; + if (pair) { + ksi = ksr + 1; + } else { + ksi = ksr; + } + if (leftv) { + +/* Compute left eigenvector. */ + + i__2 = *n - kl + 1; + slaein_(&c_false, &noinit, &i__2, &h__[kl + kl * h_dim1], ldh, + &wkr, &wki, &vl[kl + ksr * vl_dim1], &vl[kl + ksi * + vl_dim1], &work[1], &ldwork, &work[*n * *n + *n + 1], + &eps3, &smlnum, &bignum, &iinfo); + if (iinfo > 0) { + if (pair) { + *info += 2; + } else { + ++(*info); + } + ifaill[ksr] = k; + ifaill[ksi] = k; + } else { + ifaill[ksr] = 0; + ifaill[ksi] = 0; + } + i__2 = kl - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + vl[i__ + ksr * vl_dim1] = 0.f; +/* L80: */ + } + if (pair) { + i__2 = kl - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + vl[i__ + ksi * vl_dim1] = 0.f; +/* L90: */ + } + } + } + if (rightv) { + +/* Compute right eigenvector. */ + + slaein_(&c_true, &noinit, &kr, &h__[h_offset], ldh, &wkr, & + wki, &vr[ksr * vr_dim1 + 1], &vr[ksi * vr_dim1 + 1], & + work[1], &ldwork, &work[*n * *n + *n + 1], &eps3, & + smlnum, &bignum, &iinfo); + if (iinfo > 0) { + if (pair) { + *info += 2; + } else { + ++(*info); + } + ifailr[ksr] = k; + ifailr[ksi] = k; + } else { + ifailr[ksr] = 0; + ifailr[ksi] = 0; + } + i__2 = *n; + for (i__ = kr + 1; i__ <= i__2; ++i__) { + vr[i__ + ksr * vr_dim1] = 0.f; +/* L100: */ + } + if (pair) { + i__2 = *n; + for (i__ = kr + 1; i__ <= i__2; ++i__) { + vr[i__ + ksi * vr_dim1] = 0.f; +/* L110: */ + } + } + } + + if (pair) { + ksr += 2; + } else { + ++ksr; + } + } +/* L120: */ + } + + return 0; + +/* End of SHSEIN */ + +} /* shsein_ */ + diff --git a/lapack-netlib/SRC/shseqr.c b/lapack-netlib/SRC/shseqr.c new file mode 100644 index 000000000..dccfeefcf --- /dev/null +++ b/lapack-netlib/SRC/shseqr.c @@ -0,0 +1,941 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SHSEQR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SHSEQR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, */ +/* LDZ, WORK, LWORK, INFO ) */ + +/* INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N */ +/* CHARACTER COMPZ, JOB */ +/* REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), */ +/* $ Z( LDZ, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SHSEQR computes the eigenvalues of a Hessenberg matrix H */ +/* > and, optionally, the matrices T and Z from the Schur decomposition */ +/* > H = Z T Z**T, where T is an upper quasi-triangular matrix (the */ +/* > Schur form), and Z is the orthogonal matrix of Schur vectors. */ +/* > */ +/* > Optionally Z may be postmultiplied into an input orthogonal */ +/* > matrix Q so that this routine can give the Schur factorization */ +/* > of a matrix A which has been reduced to the Hessenberg form H */ +/* > by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOB */ +/* > \verbatim */ +/* > JOB is CHARACTER*1 */ +/* > = 'E': compute eigenvalues only; */ +/* > = 'S': compute eigenvalues and the Schur form T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': no Schur vectors are computed; */ +/* > = 'I': Z is initialized to the unit matrix and the matrix Z */ +/* > of Schur vectors of H is returned; */ +/* > = 'V': Z must contain an orthogonal matrix Q on entry, and */ +/* > the product Q*Z is returned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix H. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > */ +/* > It is assumed that H is already upper triangular in rows */ +/* > and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */ +/* > set by a previous call to SGEBAL, and then passed to ZGEHRD */ +/* > when the matrix output by SGEBAL is reduced to Hessenberg */ +/* > form. Otherwise ILO and IHI should be set to 1 and N */ +/* > respectively. If N > 0, then 1 <= ILO <= IHI <= N. */ +/* > If N = 0, then ILO = 1 and IHI = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] H */ +/* > \verbatim */ +/* > H is REAL array, dimension (LDH,N) */ +/* > On entry, the upper Hessenberg matrix H. */ +/* > On exit, if INFO = 0 and JOB = 'S', then H contains the */ +/* > upper quasi-triangular matrix T from the Schur decomposition */ +/* > (the Schur form); 2-by-2 diagonal blocks (corresponding to */ +/* > complex conjugate pairs of eigenvalues) are returned in */ +/* > standard form, with H(i,i) = H(i+1,i+1) and */ +/* > H(i+1,i)*H(i,i+1) < 0. If INFO = 0 and JOB = 'E', the */ +/* > contents of H are unspecified on exit. (The output value of */ +/* > H when INFO > 0 is given under the description of INFO */ +/* > below.) */ +/* > */ +/* > Unlike earlier versions of SHSEQR, this subroutine may */ +/* > explicitly H(i,j) = 0 for i > j and j = 1, 2, ... ILO-1 */ +/* > or j = IHI+1, IHI+2, ... N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDH */ +/* > \verbatim */ +/* > LDH is INTEGER */ +/* > The leading dimension of the array H. LDH >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WR */ +/* > \verbatim */ +/* > WR is REAL array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WI */ +/* > \verbatim */ +/* > WI is REAL array, dimension (N) */ +/* > */ +/* > The real and imaginary parts, respectively, of the computed */ +/* > eigenvalues. If two eigenvalues are computed as a complex */ +/* > conjugate pair, they are stored in consecutive elements of */ +/* > WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and */ +/* > WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in */ +/* > the same order as on the diagonal of the Schur form returned */ +/* > in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 */ +/* > diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */ +/* > WI(i+1) = -WI(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is REAL array, dimension (LDZ,N) */ +/* > If COMPZ = 'N', Z is not referenced. */ +/* > If COMPZ = 'I', on entry Z need not be set and on exit, */ +/* > if INFO = 0, Z contains the orthogonal matrix Z of the Schur */ +/* > vectors of H. If COMPZ = 'V', on entry Z must contain an */ +/* > N-by-N matrix Q, which is assumed to be equal to the unit */ +/* > matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, */ +/* > if INFO = 0, Z contains Q*Z. */ +/* > Normally Q is the orthogonal matrix generated by SORGHR */ +/* > after the call to SGEHRD which formed the Hessenberg matrix */ +/* > H. (The output value of Z when INFO > 0 is given under */ +/* > the description of INFO below.) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. if COMPZ = 'I' or */ +/* > COMPZ = 'V', then LDZ >= MAX(1,N). Otherwise, LDZ >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (LWORK) */ +/* > On exit, if INFO = 0, WORK(1) returns an estimate of */ +/* > the optimal value for LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= f2cmax(1,N) */ +/* > is sufficient and delivers very good and sometimes */ +/* > optimal performance. However, LWORK as large as 11*N */ +/* > may be required for optimal performance. A workspace */ +/* > query is recommended to determine the optimal workspace */ +/* > size. */ +/* > */ +/* > If LWORK = -1, then SHSEQR does a workspace query. */ +/* > In this case, SHSEQR checks the input parameters and */ +/* > estimates the optimal workspace size for the given */ +/* > values of N, ILO and IHI. The estimate is returned */ +/* > in WORK(1). No error message related to LWORK is */ +/* > issued by XERBLA. Neither H nor Z are accessed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal */ +/* > value */ +/* > > 0: if INFO = i, SHSEQR failed to compute all of */ +/* > the eigenvalues. Elements 1:ilo-1 and i+1:n of WR */ +/* > and WI contain those eigenvalues which have been */ +/* > successfully computed. (Failures are rare.) */ +/* > */ +/* > If INFO > 0 and JOB = 'E', then on exit, the */ +/* > remaining unconverged eigenvalues are the eigen- */ +/* > values of the upper Hessenberg matrix rows and */ +/* > columns ILO through INFO of the final, output */ +/* > value of H. */ +/* > */ +/* > If INFO > 0 and JOB = 'S', then on exit */ +/* > */ +/* > (*) (initial value of H)*U = U*(final value of H) */ +/* > */ +/* > where U is an orthogonal matrix. The final */ +/* > value of H is upper Hessenberg and quasi-triangular */ +/* > in rows and columns INFO+1 through IHI. */ +/* > */ +/* > If INFO > 0 and COMPZ = 'V', then on exit */ +/* > */ +/* > (final value of Z) = (initial value of Z)*U */ +/* > */ +/* > where U is the orthogonal matrix in (*) (regard- */ +/* > less of the value of JOB.) */ +/* > */ +/* > If INFO > 0 and COMPZ = 'I', then on exit */ +/* > (final value of Z) = U */ +/* > where U is the orthogonal matrix in (*) (regard- */ +/* > less of the value of JOB.) */ +/* > */ +/* > If INFO > 0 and COMPZ = 'N', then Z is not */ +/* > accessed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Karen Braman and Ralph Byers, Department of Mathematics, */ +/* > University of Kansas, USA */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Default values supplied by */ +/* > ILAENV(ISPEC,'SHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). */ +/* > It is suggested that these defaults be adjusted in order */ +/* > to attain best performance in each particular */ +/* > computational environment. */ +/* > */ +/* > ISPEC=12: The SLAHQR vs SLAQR0 crossover point. */ +/* > Default: 75. (Must be at least 11.) */ +/* > */ +/* > ISPEC=13: Recommended deflation window size. */ +/* > This depends on ILO, IHI and NS. NS is the */ +/* > number of simultaneous shifts returned */ +/* > by ILAENV(ISPEC=15). (See ISPEC=15 below.) */ +/* > The default for (IHI-ILO+1) <= 500 is NS. */ +/* > The default for (IHI-ILO+1) > 500 is 3*NS/2. */ +/* > */ +/* > ISPEC=14: Nibble crossover point. (See IPARMQ for */ +/* > details.) Default: 14% of deflation window */ +/* > size. */ +/* > */ +/* > ISPEC=15: Number of simultaneous shifts in a multishift */ +/* > QR iteration. */ +/* > */ +/* > If IHI-ILO+1 is ... */ +/* > */ +/* > greater than ...but less ... the */ +/* > or equal to ... than default is */ +/* > */ +/* > 1 30 NS = 2(+) */ +/* > 30 60 NS = 4(+) */ +/* > 60 150 NS = 10(+) */ +/* > 150 590 NS = ** */ +/* > 590 3000 NS = 64 */ +/* > 3000 6000 NS = 128 */ +/* > 6000 infinity NS = 256 */ +/* > */ +/* > (+) By default some or all matrices of this order */ +/* > are passed to the implicit double shift routine */ +/* > SLAHQR and this parameter is ignored. See */ +/* > ISPEC=12 above and comments in IPARMQ for */ +/* > details. */ +/* > */ +/* > (**) The asterisks (**) indicate an ad-hoc */ +/* > function of N increasing from 10 to 64. */ +/* > */ +/* > ISPEC=16: Select structured matrix multiply. */ +/* > If the number of simultaneous shifts (specified */ +/* > by ISPEC=15) is less than 14, then the default */ +/* > for ISPEC=16 is 0. Otherwise the default for */ +/* > ISPEC=16 is 2. */ +/* > \endverbatim */ + +/* > \par References: */ +/* ================ */ +/* > */ +/* > K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ +/* > Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */ +/* > Performance, SIAM Journal of Matrix Analysis, volume 23, pages */ +/* > 929--947, 2002. */ +/* > \n */ +/* > K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */ +/* > Algorithm Part II: Aggressive Early Deflation, SIAM Journal */ +/* > of Matrix Analysis, volume 23, pages 948--973, 2002. */ + +/* ===================================================================== */ +/* Subroutine */ int shseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, + integer *ldz, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3; + real r__1; + char ch__1[2]; + + /* Local variables */ + integer kbot, nmin, i__; + extern logical lsame_(char *, char *); + logical initz; + real workl[49]; + logical wantt, wantz; + extern /* Subroutine */ int slaqr0_(logical *, logical *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , integer *, real *, integer *, real *, integer *, integer *); + real hl[2401] /* was [49][49] */; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slahqr_(logical *, logical *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , integer *, real *, integer *, integer *), slacpy_(char *, + integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + logical lquery; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + +/* ==== Matrices of order NTINY or smaller must be processed by */ +/* . SLAHQR because of insufficient subdiagonal scratch space. */ +/* . (This is a hard limit.) ==== */ + +/* ==== NL allocates some local workspace to help small matrices */ +/* . through a rare SLAHQR failure. NL > NTINY = 15 is */ +/* . required and NL <= NMIN = ILAENV(ISPEC=12,...) is recom- */ +/* . mended. (The default value of NMIN is 75.) Using NL = 49 */ +/* . allows up to six simultaneous shifts and a 16-by-16 */ +/* . deflation window. ==== */ + +/* ==== Decode and check the input parameters. ==== */ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1 * 1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1 * 1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = lsame_(job, "S"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + work[1] = (real) f2cmax(1,*n); + lquery = *lwork == -1; + + *info = 0; + if (! lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > f2cmax(1,*n)) { + *info = -4; + } else if (*ihi < f2cmin(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < f2cmax(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < f2cmax(1,*n)) { + *info = -11; + } else if (*lwork < f2cmax(1,*n) && ! lquery) { + *info = -13; + } + + if (*info != 0) { + +/* ==== Quick return in case of invalid argument. ==== */ + + i__1 = -(*info); + xerbla_("SHSEQR", &i__1, (ftnlen)6); + return 0; + + } else if (*n == 0) { + +/* ==== Quick return in case N = 0; nothing to do. ==== */ + + return 0; + + } else if (lquery) { + +/* ==== Quick return in case of a workspace query ==== */ + + slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ + 1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info); +/* ==== Ensure reported workspace size is backward-compatible with */ +/* . previous LAPACK versions. ==== */ +/* Computing MAX */ + r__1 = (real) f2cmax(1,*n); + work[1] = f2cmax(r__1,work[1]); + return 0; + + } else { + +/* ==== copy eigenvalues isolated by SGEBAL ==== */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.f; +/* L10: */ + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.f; +/* L20: */ + } + +/* ==== Initialize Z, if requested ==== */ + + if (initz) { + slaset_("A", n, n, &c_b11, &c_b12, &z__[z_offset], ldz) + ; + } + +/* ==== Quick return if possible ==== */ + + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.f; + return 0; + } + +/* ==== SLAHQR/SLAQR0 crossover point ==== */ + +/* Writing concatenation */ + i__2[0] = 1, a__1[0] = job; + i__2[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nmin = ilaenv_(&c__12, "SHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6, + (ftnlen)2); + nmin = f2cmax(15,nmin); + +/* ==== SLAQR0 for big matrices; SLAHQR for small ones ==== */ + + if (*n > nmin) { + slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], + &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, + info); + } else { + +/* ==== Small matrix ==== */ + + slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], + &wi[1], ilo, ihi, &z__[z_offset], ldz, info); + + if (*info > 0) { + +/* ==== A rare SLAHQR failure! SLAQR0 sometimes succeeds */ +/* . when SLAHQR fails. ==== */ + + kbot = *info; + + if (*n >= 49) { + +/* ==== Larger matrices have enough subdiagonal scratch */ +/* . space to call SLAQR0 directly. ==== */ + + slaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], + ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], + ldz, &work[1], lwork, info); + + } else { + +/* ==== Tiny matrices don't have enough subdiagonal */ +/* . scratch space to benefit from SLAQR0. Hence, */ +/* . tiny matrices must be copied into a larger */ +/* . array before calling SLAQR0. ==== */ + + slacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49); + hl[*n + 1 + *n * 49 - 50] = 0.f; + i__1 = 49 - *n; + slaset_("A", &c__49, &i__1, &c_b11, &c_b11, &hl[(*n + 1) * + 49 - 49], &c__49); + slaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, & + wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, + workl, &c__49, info); + if (wantt || *info != 0) { + slacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh); + } + } + } + } + +/* ==== Clear out the trash, if necessary. ==== */ + + if ((wantt || *info != 0) && *n > 2) { + i__1 = *n - 2; + i__3 = *n - 2; + slaset_("L", &i__1, &i__3, &c_b11, &c_b11, &h__[h_dim1 + 3], ldh); + } + +/* ==== Ensure reported workspace size is backward-compatible with */ +/* . previous LAPACK versions. ==== */ + +/* Computing MAX */ + r__1 = (real) f2cmax(1,*n); + work[1] = f2cmax(r__1,work[1]); + } + +/* ==== End of SHSEQR ==== */ + + return 0; +} /* shseqr_ */ + diff --git a/lapack-netlib/SRC/sisnan.c b/lapack-netlib/SRC/sisnan.c new file mode 100644 index 000000000..689e199fe --- /dev/null +++ b/lapack-netlib/SRC/sisnan.c @@ -0,0 +1,469 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SISNAN tests input for NaN. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SISNAN + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* LOGICAL FUNCTION SISNAN( SIN ) */ + +/* REAL SIN */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SISNAN returns .TRUE. if its argument is NaN, and .FALSE. */ +/* > otherwise. To be replaced by the Fortran 2003 intrinsic in the */ +/* > future. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIN */ +/* > \verbatim */ +/* > SIN is REAL */ +/* > Input to test for NaN. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +logical sisnan_(real *sin__) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + extern logical slaisnan_(real *, real *); + + +/* -- LAPACK auxiliary routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + ret_val = slaisnan_(sin__, sin__); + return ret_val; +} /* sisnan_ */ + diff --git a/lapack-netlib/SRC/sla_gbamv.c b/lapack-netlib/SRC/sla_gbamv.c new file mode 100644 index 000000000..4cc9a4596 --- /dev/null +++ b/lapack-netlib/SRC/sla_gbamv.c @@ -0,0 +1,815 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SLA_GBAMV performs a matrix-vector operation to calculate error bounds. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SLA_GBAMV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, */ +/* INCX, BETA, Y, INCY ) */ + +/* REAL ALPHA, BETA */ +/* INTEGER INCX, INCY, LDAB, M, N, KL, KU, TRANS */ +/* REAL AB( LDAB, * ), X( * ), Y( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLA_GBAMV performs one of the matrix-vector operations */ +/* > */ +/* > y := alpha*abs(A)*abs(x) + beta*abs(y), */ +/* > or y := alpha*abs(A)**T*abs(x) + beta*abs(y), */ +/* > */ +/* > where alpha and beta are scalars, x and y are vectors and A is an */ +/* > m by n matrix. */ +/* > */ +/* > This function is primarily used in calculating error bounds. */ +/* > To protect against underflow during evaluation, components in */ +/* > the resulting vector are perturbed away from zero by (N+1) */ +/* > times the underflow threshold. To prevent unnecessarily large */ +/* > errors for block-structure embedded in general matrices, */ +/* > "symbolically" zero components are not perturbed. A zero */ +/* > entry is considered "symbolic" if all multiplications involved */ +/* > in computing that entry have at least one zero multiplicand. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is INTEGER */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) */ +/* > BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) */ +/* > BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) */ +/* > */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of the matrix A. */ +/* > M must be at least zero. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of the matrix A. */ +/* > N must be at least zero. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is REAL */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is REAL array, dimension ( LDAB, n ) */ +/* > Before entry, the leading m by n part of the array AB must */ +/* > contain the matrix of coefficients. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > On entry, LDA specifies the first dimension of AB as declared */ +/* > in the calling (sub) program. LDAB must be at least */ +/* > f2cmax( 1, m ). */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is REAL array, dimension */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* > and at least */ +/* > ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* > Before entry, the incremented array X must contain the */ +/* > vector x. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is REAL */ +/* > On entry, BETA specifies the scalar beta. When BETA is */ +/* > supplied as zero then Y need not be set on input. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Y */ +/* > \verbatim */ +/* > Y is REAL array, dimension */ +/* > ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* > and at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* > Before entry with BETA non-zero, the incremented array Y */ +/* > must contain the vector y. On exit, Y is overwritten by the */ +/* > updated vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > Unchanged on exit. */ +/* > */ +/* > Level 2 Blas routine. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup realGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sla_gbamv_(integer *trans, integer *m, integer *n, + integer *kl, integer *ku, real *alpha, real *ab, integer *ldab, real * + x, integer *incx, real *beta, real *y, integer *incy) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; + real r__1; + + /* Local variables */ + integer info; + real temp; + integer lenx, leny; + extern integer ilatrans_(char *); + real safe1; + integer i__, j; + logical symb_zero__; + integer kd, ke, iy, jx, kx, ky; + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! (*trans == ilatrans_("N") || *trans == ilatrans_("T") || *trans == ilatrans_("C"))) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*kl < 0 || *kl > *m - 1) { + info = 4; + } else if (*ku < 0 || *ku > *n - 1) { + info = 5; + } else if (*ldab < *kl + *ku + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("SLA_GBAMV ", &info, (ftnlen)10); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { + return 0; + } + +/* Set LENX and LENY, the lengths of the vectors x and y, and set */ +/* up the start points in X and Y. */ + + if (*trans == ilatrans_("N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* Set SAFE1 essentially to be the underflow threshold times the */ +/* number of additions in each row. */ + + safe1 = slamch_("Safe minimum"); + safe1 = (*n + 1) * safe1; + +/* Form y := alpha*abs(A)*abs(x) + beta*abs(y). */ + +/* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */ +/* the inexact flag. Still doesn't help change the iteration order */ +/* to per-column. */ + + kd = *ku + 1; + ke = *kl + 1; + iy = ky; + if (*incx == 1) { + if (*trans == ilatrans_("N")) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.f) { + symb_zero__ = TRUE_; + y[iy] = 0.f; + } else if (y[iy] == 0.f) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (r__1 = y[iy], abs(r__1)); + } + if (*alpha != 0.f) { +/* Computing MAX */ + i__2 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__3 = f2cmin(i__4,lenx); + for (j = f2cmax(i__2,1); j <= i__3; ++j) { + temp = (r__1 = ab[kd + i__ - j + j * ab_dim1], abs( + r__1)); + symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == + 0.f); + y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp; + } + } + if (! symb_zero__) { + y[iy] += r_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.f) { + symb_zero__ = TRUE_; + y[iy] = 0.f; + } else if (y[iy] == 0.f) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (r__1 = y[iy], abs(r__1)); + } + if (*alpha != 0.f) { +/* Computing MAX */ + i__3 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__2 = f2cmin(i__4,lenx); + for (j = f2cmax(i__3,1); j <= i__2; ++j) { + temp = (r__1 = ab[ke - i__ + j + i__ * ab_dim1], abs( + r__1)); + symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == + 0.f); + y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp; + } + } + if (! symb_zero__) { + y[iy] += r_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } + } else { + if (*trans == ilatrans_("N")) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.f) { + symb_zero__ = TRUE_; + y[iy] = 0.f; + } else if (y[iy] == 0.f) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (r__1 = y[iy], abs(r__1)); + } + if (*alpha != 0.f) { + jx = kx; +/* Computing MAX */ + i__2 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__3 = f2cmin(i__4,lenx); + for (j = f2cmax(i__2,1); j <= i__3; ++j) { + temp = (r__1 = ab[kd + i__ - j + j * ab_dim1], abs( + r__1)); + symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == + 0.f); + y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp; + jx += *incx; + } + } + if (! symb_zero__) { + y[iy] += r_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.f) { + symb_zero__ = TRUE_; + y[iy] = 0.f; + } else if (y[iy] == 0.f) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (r__1 = y[iy], abs(r__1)); + } + if (*alpha != 0.f) { + jx = kx; +/* Computing MAX */ + i__3 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__2 = f2cmin(i__4,lenx); + for (j = f2cmax(i__3,1); j <= i__2; ++j) { + temp = (r__1 = ab[ke - i__ + j + i__ * ab_dim1], abs( + r__1)); + symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == + 0.f); + y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp; + jx += *incx; + } + } + if (! symb_zero__) { + y[iy] += r_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } + } + + return 0; + +/* End of SLA_GBAMV */ + +} /* sla_gbamv__ */ + diff --git a/lapack-netlib/SRC/sla_gbrcond.c b/lapack-netlib/SRC/sla_gbrcond.c new file mode 100644 index 000000000..4cb9a4da6 --- /dev/null +++ b/lapack-netlib/SRC/sla_gbrcond.c @@ -0,0 +1,791 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SLA_GBRCOND estimates the Skeel condition number for a general banded matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SLA_GBRCOND + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* REAL FUNCTION SLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, LDAFB, */ +/* IPIV, CMODE, C, INFO, WORK, IWORK ) */ + +/* CHARACTER TRANS */ +/* INTEGER N, LDAB, LDAFB, INFO, KL, KU, CMODE */ +/* INTEGER IWORK( * ), IPIV( * ) */ +/* REAL AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), */ +/* $ C( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLA_GBRCOND Estimates the Skeel condition number of op(A) * op2(C) */ +/* > where op2 is determined by CMODE as follows */ +/* > CMODE = 1 op2(C) = C */ +/* > CMODE = 0 op2(C) = I */ +/* > CMODE = -1 op2(C) = inv(C) */ +/* > The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ +/* > is computed by computing scaling factors R such that */ +/* > diag(R)*A*op2(C) is row equilibrated and computing the standard */ +/* > infinity-norm condition number. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is REAL array, dimension (LDAB,N) */ +/* > On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AFB */ +/* > \verbatim */ +/* > AFB is REAL array, dimension (LDAFB,N) */ +/* > Details of the LU factorization of the band matrix A, as */ +/* > computed by SGBTRF. U is stored as an upper triangular */ +/* > band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ +/* > and the multipliers used during the factorization are stored */ +/* > in rows KL+KU+2 to 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAFB */ +/* > \verbatim */ +/* > LDAFB is INTEGER */ +/* > The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from the factorization A = P*L*U */ +/* > as computed by SGBTRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CMODE */ +/* > \verbatim */ +/* > CMODE is INTEGER */ +/* > Determines op2(C) in the formula op(A) * op2(C) as follows: */ +/* > CMODE = 1 op2(C) = C */ +/* > CMODE = 0 op2(C) = I */ +/* > CMODE = -1 op2(C) = inv(C) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (N) */ +/* > The vector C in the formula op(A) * op2(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. */ +/* > i > 0: The ith argument is invalid. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (5*N). */ +/* > Workspace. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N). */ +/* > Workspace. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGBcomputational */ + +/* ===================================================================== */ +real sla_gbrcond_(char *trans, integer *n, integer *kl, integer *ku, real * + ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, integer * + cmode, real *c__, integer *info, real *work, integer *iwork) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4; + real ret_val, r__1; + + /* Local variables */ + integer kase, i__, j; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int slacn2_(integer *, real *, real *, integer *, + real *, integer *, integer *); + integer kd, ke; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + real ainvnm; + extern /* Subroutine */ int sgbtrs_(char *, integer *, integer *, integer + *, integer *, real *, integer *, integer *, real *, integer *, + integer *); + real tmp; + logical notrans; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1 * 1; + afb -= afb_offset; + --ipiv; + --c__; + --work; + --iwork; + + /* Function Body */ + ret_val = 0.f; + + *info = 0; + notrans = lsame_(trans, "N"); + if (! notrans && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*kl < 0 || *kl > *n - 1) { + *info = -3; + } else if (*ku < 0 || *ku > *n - 1) { + *info = -4; + } else if (*ldab < *kl + *ku + 1) { + *info = -6; + } else if (*ldafb < (*kl << 1) + *ku + 1) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLA_GBRCOND", &i__1, (ftnlen)11); + return ret_val; + } + if (*n == 0) { + ret_val = 1.f; + return ret_val; + } + +/* Compute the equilibration matrix R such that */ +/* inv(R)*A*C has unit 1-norm. */ + + kd = *ku + 1; + ke = *kl + 1; + if (notrans) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.f; + if (*cmode == 1) { +/* Computing MAX */ + i__2 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__3 = f2cmin(i__4,*n); + for (j = f2cmax(i__2,1); j <= i__3; ++j) { + tmp += (r__1 = ab[kd + i__ - j + j * ab_dim1] * c__[j], + abs(r__1)); + } + } else if (*cmode == 0) { +/* Computing MAX */ + i__3 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__2 = f2cmin(i__4,*n); + for (j = f2cmax(i__3,1); j <= i__2; ++j) { + tmp += (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(r__1)); + } + } else { +/* Computing MAX */ + i__2 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__3 = f2cmin(i__4,*n); + for (j = f2cmax(i__2,1); j <= i__3; ++j) { + tmp += (r__1 = ab[kd + i__ - j + j * ab_dim1] / c__[j], + abs(r__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.f; + if (*cmode == 1) { +/* Computing MAX */ + i__3 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__2 = f2cmin(i__4,*n); + for (j = f2cmax(i__3,1); j <= i__2; ++j) { + tmp += (r__1 = ab[ke - i__ + j + i__ * ab_dim1] * c__[j], + abs(r__1)); + } + } else if (*cmode == 0) { +/* Computing MAX */ + i__2 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__3 = f2cmin(i__4,*n); + for (j = f2cmax(i__2,1); j <= i__3; ++j) { + tmp += (r__1 = ab[ke - i__ + j + i__ * ab_dim1], abs(r__1) + ); + } + } else { +/* Computing MAX */ + i__3 = i__ - *kl; +/* Computing MIN */ + i__4 = i__ + *ku; + i__2 = f2cmin(i__4,*n); + for (j = f2cmax(i__3,1); j <= i__2; ++j) { + tmp += (r__1 = ab[ke - i__ + j + i__ * ab_dim1] / c__[j], + abs(r__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } + +/* Estimate the norm of inv(op(A)). */ + + ainvnm = 0.f; + kase = 0; +L10: + slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == 2) { + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + if (notrans) { + sgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], + ldafb, &ipiv[1], &work[1], n, info); + } else { + sgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], + ldafb, &ipiv[1], &work[1], n, info); + } + +/* Multiply by inv(C). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + } else { + +/* Multiply by inv(C**T). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + if (notrans) { + sgbtrs_("Transpose", n, kl, ku, &c__1, &afb[afb_offset], + ldafb, &ipiv[1], &work[1], n, info); + } else { + sgbtrs_("No transpose", n, kl, ku, &c__1, &afb[afb_offset], + ldafb, &ipiv[1], &work[1], n, info); + } + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.f) { + ret_val = 1.f / ainvnm; + } + + return ret_val; + +} /* sla_gbrcond__ */ + diff --git a/lapack-netlib/SRC/sla_gbrfsx_extended.c b/lapack-netlib/SRC/sla_gbrfsx_extended.c new file mode 100644 index 000000000..7a4c06b85 --- /dev/null +++ b/lapack-netlib/SRC/sla_gbrfsx_extended.c @@ -0,0 +1,1135 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SLA_GBRFSX_EXTENDED improves the computed solution to a system of linear equations for general +banded matrices by performing extra-precise iterative refinement and provides error bounds and backwar +d error estimates for the solution. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SLA_GBRFSX_EXTENDED + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, */ +/* NRHS, AB, LDAB, AFB, LDAFB, IPIV, */ +/* COLEQU, C, B, LDB, Y, LDY, */ +/* BERR_OUT, N_NORMS, ERR_BNDS_NORM, */ +/* ERR_BNDS_COMP, RES, AYB, DY, */ +/* Y_TAIL, RCOND, ITHRESH, RTHRESH, */ +/* DZ_UB, IGNORE_CWISE, INFO ) */ + +/* INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, */ +/* $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH */ +/* LOGICAL COLEQU, IGNORE_CWISE */ +/* REAL RTHRESH, DZ_UB */ +/* INTEGER IPIV( * ) */ +/* REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), */ +/* $ Y( LDY, * ), RES(*), DY(*), Y_TAIL(*) */ +/* REAL C( * ), AYB(*), RCOND, BERR_OUT(*), */ +/* $ ERR_BNDS_NORM( NRHS, * ), */ +/* $ ERR_BNDS_COMP( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLA_GBRFSX_EXTENDED improves the computed solution to a system of */ +/* > linear equations by performing extra-precise iterative refinement */ +/* > and provides error bounds and backward error estimates for the solution. */ +/* > This subroutine is called by SGBRFSX to perform iterative refinement. */ +/* > In addition to normwise error bound, the code provides maximum */ +/* > componentwise error bound if possible. See comments for ERR_BNDS_NORM */ +/* > and ERR_BNDS_COMP for details of the error bounds. Note that this */ +/* > subroutine is only resonsible for setting the second fields of */ +/* > ERR_BNDS_NORM and ERR_BNDS_COMP. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] PREC_TYPE */ +/* > \verbatim */ +/* > PREC_TYPE is INTEGER */ +/* > Specifies the intermediate precision to be used in refinement. */ +/* > The value is defined by ILAPREC(P) where P is a CHARACTER and P */ +/* > = 'S': Single */ +/* > = 'D': Double */ +/* > = 'I': Indigenous */ +/* > = 'X' or 'E': Extra */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS_TYPE */ +/* > \verbatim */ +/* > TRANS_TYPE is INTEGER */ +/* > Specifies the transposition operation on A. */ +/* > The value is defined by ILATRANS(T) where T is a CHARACTER and T */ +/* > = 'N': No transpose */ +/* > = 'T': Transpose */ +/* > = 'C': Conjugate transpose */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0 */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right-hand-sides, i.e., the number of columns of the */ +/* > matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is REAL array, dimension (LDAB,N) */ +/* > On entry, the N-by-N matrix AB. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AFB */ +/* > \verbatim */ +/* > AFB is REAL array, dimension (LDAFB,N) */ +/* > The factors L and U from the factorization */ +/* > A = P*L*U as computed by SGBTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAFB */ +/* > \verbatim */ +/* > LDAFB is INTEGER */ +/* > The leading dimension of the array AF. LDAFB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from the factorization A = P*L*U */ +/* > as computed by SGBTRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COLEQU */ +/* > \verbatim */ +/* > COLEQU is LOGICAL */ +/* > If .TRUE. then column equilibration was done to A before calling */ +/* > this routine. This is needed to compute the solution and error */ +/* > bounds correctly. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (N) */ +/* > The column scale factors for A. If COLEQU = .FALSE., C */ +/* > is not accessed. If C is input, each element of C should be a power */ +/* > of the radix to ensure a reliable solution and error estimates. */ +/* > Scaling by powers of the radix does not cause rounding errors unless */ +/* > the result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > The right-hand-side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Y */ +/* > \verbatim */ +/* > Y is REAL array, dimension (LDY,NRHS) */ +/* > On entry, the solution matrix X, as computed by SGBTRS. */ +/* > On exit, the improved solution matrix Y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDY */ +/* > \verbatim */ +/* > LDY is INTEGER */ +/* > The leading dimension of the array Y. LDY >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR_OUT */ +/* > \verbatim */ +/* > BERR_OUT is REAL array, dimension (NRHS) */ +/* > On exit, BERR_OUT(j) contains the componentwise relative backward */ +/* > error for right-hand-side j from the formula */ +/* > f2cmax(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* > where abs(Z) is the componentwise absolute value of the matrix */ +/* > or vector Z. This is computed by SLA_LIN_BERR. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_NORMS */ +/* > \verbatim */ +/* > N_NORMS is INTEGER */ +/* > Determines which error bounds to return (see ERR_BNDS_NORM */ +/* > and ERR_BNDS_COMP). */ +/* > If N_NORMS >= 1 return normwise error bounds. */ +/* > If N_NORMS >= 2 return componentwise error bounds. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ERR_BNDS_NORM */ +/* > \verbatim */ +/* > ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_NORM(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > This subroutine is only responsible for setting the second field */ +/* > above. */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ERR_BNDS_COMP */ +/* > \verbatim */ +/* > ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERR_BNDS_COMP is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERR_BNDS_COMP(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > This subroutine is only responsible for setting the second field */ +/* > above. */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RES */ +/* > \verbatim */ +/* > RES is REAL array, dimension (N) */ +/* > Workspace to hold the intermediate residual. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AYB */ +/* > \verbatim */ +/* > AYB is REAL array, dimension (N) */ +/* > Workspace. This can be the same workspace passed for Y_TAIL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DY */ +/* > \verbatim */ +/* > DY is REAL array, dimension (N) */ +/* > Workspace to hold the intermediate solution. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Y_TAIL */ +/* > \verbatim */ +/* > Y_TAIL is REAL array, dimension (N) */ +/* > Workspace to hold the trailing bits of the intermediate solution. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ITHRESH */ +/* > \verbatim */ +/* > ITHRESH is INTEGER */ +/* > The maximum number of residual computations allowed for */ +/* > refinement. The default is 10. For 'aggressive' set to 100 to */ +/* > permit convergence using approximate factorizations or */ +/* > factorizations other than LU. If the factorization uses a */ +/* > technique other than Gaussian elimination, the guarantees in */ +/* > ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RTHRESH */ +/* > \verbatim */ +/* > RTHRESH is REAL */ +/* > Determines when to stop refinement if the error estimate stops */ +/* > decreasing. Refinement will stop when the next solution no longer */ +/* > satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ +/* > the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ +/* > default value is 0.5. For 'aggressive' set to 0.9 to permit */ +/* > convergence on extremely ill-conditioned matrices. See LAWN 165 */ +/* > for more details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DZ_UB */ +/* > \verbatim */ +/* > DZ_UB is REAL */ +/* > Determines when to start considering componentwise convergence. */ +/* > Componentwise convergence is only considered after each component */ +/* > of the solution Y is stable, which we definte as the relative */ +/* > change in each component being less than DZ_UB. The default value */ +/* > is 0.25, requiring the first bit to be stable. See LAWN 165 for */ +/* > more details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IGNORE_CWISE */ +/* > \verbatim */ +/* > IGNORE_CWISE is LOGICAL */ +/* > If .TRUE. then ignore componentwise convergence. Default value */ +/* > is .FALSE.. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. */ +/* > < 0: if INFO = -i, the ith argument to SGBTRS had an illegal */ +/* > value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup realGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sla_gbrfsx_extended_(integer *prec_type__, integer * + trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, + real *ab, integer *ldab, real *afb, integer *ldafb, integer *ipiv, + logical *colequ, real *c__, real *b, integer *ldb, real *y, integer * + ldy, real *berr_out__, integer *n_norms__, real *err_bnds_norm__, + real *err_bnds_comp__, real *res, real *ayb, real *dy, real *y_tail__, + real *rcond, integer *ithresh, real *rthresh, real *dz_ub__, logical + *ignore_cwise__, integer *info) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, + y_dim1, y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, + err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3; + real r__1, r__2; + char ch__1[1]; + + /* Local variables */ + real dx_x__, dz_z__, ymin; + extern /* Subroutine */ int sla_lin_berr_(integer *, integer *, integer * + , real *, real *, real *), blas_sgbmv_x__(integer *, integer *, + integer *, integer *, integer *, real *, real *, integer *, real * + , integer *, real *, real *, integer *, integer *); + real dxratmax, dzratmax; + integer y_prec_state__, i__, j, m; + extern /* Subroutine */ int blas_sgbmv2_x_(integer *, integer *, integer + *, integer *, integer *, real *, real *, integer *, real *, real * + , integer *, real *, real *, integer *, integer *), sla_gbamv__( + integer *, integer *, integer *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *), + sgbmv_(char *, integer *, integer *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + real dxrat; + logical incr_prec__; + real dzrat; + char trans[1]; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + real normx, normy; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *); + real myhugeval, prev_dz_z__, yk; + extern real slamch_(char *); + real final_dx_x__, final_dz_z__; + extern /* Subroutine */ int sgbtrs_(char *, integer *, integer *, integer + *, integer *, real *, integer *, integer *, real *, integer *, + integer *); + real normdx; + extern /* Subroutine */ int sla_wwaddw_(integer *, real *, real *, real * + ); + extern /* Character */ VOID chla_transtype_(char *, integer *); + real prevnormdx; + integer cnt; + real dyk, eps; + integer x_state__, z_state__; + real incr_thresh__; + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + err_bnds_comp_dim1 = *nrhs; + err_bnds_comp_offset = 1 + err_bnds_comp_dim1 * 1; + err_bnds_comp__ -= err_bnds_comp_offset; + err_bnds_norm_dim1 = *nrhs; + err_bnds_norm_offset = 1 + err_bnds_norm_dim1 * 1; + err_bnds_norm__ -= err_bnds_norm_offset; + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1 * 1; + afb -= afb_offset; + --ipiv; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --berr_out__; + --res; + --ayb; + --dy; + --y_tail__; + + /* Function Body */ + if (*info != 0) { + return 0; + } + chla_transtype_(ch__1, trans_type__); + *(unsigned char *)trans = *(unsigned char *)&ch__1[0]; + eps = slamch_("Epsilon"); + myhugeval = slamch_("Overflow"); +/* Force MYHUGEVAL to Inf */ + myhugeval *= myhugeval; +/* Using MYHUGEVAL may lead to spurious underflows. */ + incr_thresh__ = (real) (*n) * eps; + m = *kl + *ku + 1; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + y_prec_state__ = 1; + if (y_prec_state__ == 2) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + y_tail__[i__] = 0.f; + } + } + dxrat = 0.f; + dxratmax = 0.f; + dzrat = 0.f; + dzratmax = 0.f; + final_dx_x__ = myhugeval; + final_dz_z__ = myhugeval; + prevnormdx = myhugeval; + prev_dz_z__ = myhugeval; + dz_z__ = myhugeval; + dx_x__ = myhugeval; + x_state__ = 1; + z_state__ = 0; + incr_prec__ = FALSE_; + i__2 = *ithresh; + for (cnt = 1; cnt <= i__2; ++cnt) { + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + scopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + if (y_prec_state__ == 0) { + sgbmv_(trans, &m, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[ + j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1); + } else if (y_prec_state__ == 1) { + blas_sgbmv_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[ + ab_offset], ldab, &y[j * y_dim1 + 1], &c__1, &c_b8, & + res[1], &c__1, prec_type__); + } else { + blas_sgbmv2_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[ + ab_offset], ldab, &y[j * y_dim1 + 1], &y_tail__[1], & + c__1, &c_b8, &res[1], &c__1, prec_type__); + } +/* XXX: RES is no longer needed. */ + scopy_(n, &res[1], &c__1, &dy[1], &c__1); + sgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1] + , &dy[1], n, info); + +/* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ + + normx = 0.f; + normy = 0.f; + normdx = 0.f; + dz_z__ = 0.f; + ymin = myhugeval; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + yk = (r__1 = y[i__ + j * y_dim1], abs(r__1)); + dyk = (r__1 = dy[i__], abs(r__1)); + if (yk != 0.f) { +/* Computing MAX */ + r__1 = dz_z__, r__2 = dyk / yk; + dz_z__ = f2cmax(r__1,r__2); + } else if (dyk != 0.f) { + dz_z__ = myhugeval; + } + ymin = f2cmin(ymin,yk); + normy = f2cmax(normy,yk); + if (*colequ) { +/* Computing MAX */ + r__1 = normx, r__2 = yk * c__[i__]; + normx = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = normdx, r__2 = dyk * c__[i__]; + normdx = f2cmax(r__1,r__2); + } else { + normx = normy; + normdx = f2cmax(normdx,dyk); + } + } + if (normx != 0.f) { + dx_x__ = normdx / normx; + } else if (normdx == 0.f) { + dx_x__ = 0.f; + } else { + dx_x__ = myhugeval; + } + dxrat = normdx / prevnormdx; + dzrat = dz_z__ / prev_dz_z__; + +/* Check termination criteria. */ + + if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy + && y_prec_state__ < 2) { + incr_prec__ = TRUE_; + } + if (x_state__ == 3 && dxrat <= *rthresh) { + x_state__ = 1; + } + if (x_state__ == 1) { + if (dx_x__ <= eps) { + x_state__ = 2; + } else if (dxrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + x_state__ = 3; + } + } else { + if (dxrat > dxratmax) { + dxratmax = dxrat; + } + } + if (x_state__ > 1) { + final_dx_x__ = dx_x__; + } + } + if (z_state__ == 0 && dz_z__ <= *dz_ub__) { + z_state__ = 1; + } + if (z_state__ == 3 && dzrat <= *rthresh) { + z_state__ = 1; + } + if (z_state__ == 1) { + if (dz_z__ <= eps) { + z_state__ = 2; + } else if (dz_z__ > *dz_ub__) { + z_state__ = 0; + dzratmax = 0.f; + final_dz_z__ = myhugeval; + } else if (dzrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + z_state__ = 3; + } + } else { + if (dzrat > dzratmax) { + dzratmax = dzrat; + } + } + if (z_state__ > 1) { + final_dz_z__ = dz_z__; + } + } + +/* Exit if both normwise and componentwise stopped working, */ +/* but if componentwise is unstable, let it go at least two */ +/* iterations. */ + + if (x_state__ != 1) { + if (*ignore_cwise__) { + goto L666; + } + if (z_state__ == 3 || z_state__ == 2) { + goto L666; + } + if (z_state__ == 0 && cnt > 1) { + goto L666; + } + } + if (incr_prec__) { + incr_prec__ = FALSE_; + ++y_prec_state__; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + y_tail__[i__] = 0.f; + } + } + prevnormdx = normdx; + prev_dz_z__ = dz_z__; + +/* Update soluton. */ + + if (y_prec_state__ < 2) { + saxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); + } else { + sla_wwaddw_(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); + } + } +/* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't CALL MYEXIT. */ +L666: + +/* Set final_* when cnt hits ithresh. */ + + if (x_state__ == 1) { + final_dx_x__ = dx_x__; + } + if (z_state__ == 1) { + final_dz_z__ = dz_z__; + } + +/* Compute error bounds. */ + + if (*n_norms__ >= 1) { + err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / ( + 1 - dxratmax); + } + if (*n_norms__ >= 2) { + err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / ( + 1 - dzratmax); + } + +/* Compute componentwise relative backward error from formula */ +/* f2cmax(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. */ + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + scopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + sgbmv_(trans, n, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[j * + y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + ayb[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1)); + } + +/* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ + + sla_gbamv_(trans_type__, n, n, kl, ku, &c_b8, &ab[ab_offset], ldab, & + y[j * y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1); + sla_lin_berr_(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); + +/* End of loop for each RHS */ + + } + + return 0; +} /* sla_gbrfsx_extended__ */ + diff --git a/lapack-netlib/SRC/sla_gbrpvgrw.c b/lapack-netlib/SRC/sla_gbrpvgrw.c new file mode 100644 index 000000000..11debe424 --- /dev/null +++ b/lapack-netlib/SRC/sla_gbrpvgrw.c @@ -0,0 +1,569 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SLA_GBRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a general banded m +atrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SLA_GBRPVGRW + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* REAL FUNCTION SLA_GBRPVGRW( N, KL, KU, NCOLS, AB, LDAB, AFB, */ +/* LDAFB ) */ + +/* INTEGER N, KL, KU, NCOLS, LDAB, LDAFB */ +/* REAL AB( LDAB, * ), AFB( LDAFB, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLA_GBRPVGRW computes the reciprocal pivot growth factor */ +/* > norm(A)/norm(U). The "f2cmax absolute element" norm is used. If this is */ +/* > much less than 1, the stability of the LU factorization of the */ +/* > (equilibrated) matrix A could be poor. This also means that the */ +/* > solution X, estimated condition numbers, and error bounds could be */ +/* > unreliable. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The number of subdiagonals within the band of A. KL >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The number of superdiagonals within the band of A. KU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NCOLS */ +/* > \verbatim */ +/* > NCOLS is INTEGER */ +/* > The number of columns of the matrix A. NCOLS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AB */ +/* > \verbatim */ +/* > AB is REAL array, dimension (LDAB,N) */ +/* > On entry, the matrix A in band storage, in rows 1 to KL+KU+1. */ +/* > The j-th column of A is stored in the j-th column of the */ +/* > array AB as follows: */ +/* > AB(KU+1+i-j,j) = A(i,j) for f2cmax(1,j-KU)<=i<=f2cmin(N,j+kl) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAB */ +/* > \verbatim */ +/* > LDAB is INTEGER */ +/* > The leading dimension of the array AB. LDAB >= KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AFB */ +/* > \verbatim */ +/* > AFB is REAL array, dimension (LDAFB,N) */ +/* > Details of the LU factorization of the band matrix A, as */ +/* > computed by SGBTRF. U is stored as an upper triangular */ +/* > band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, */ +/* > and the multipliers used during the factorization are stored */ +/* > in rows KL+KU+2 to 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAFB */ +/* > \verbatim */ +/* > LDAFB is INTEGER */ +/* > The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGBcomputational */ + +/* ===================================================================== */ +real sla_gbrpvgrw_(integer *n, integer *kl, integer *ku, integer *ncols, + real *ab, integer *ldab, real *afb, integer *ldafb) +{ + /* System generated locals */ + integer ab_dim1, ab_offset, afb_dim1, afb_offset, i__1, i__2, i__3, i__4; + real ret_val, r__1, r__2; + + /* Local variables */ + real amax, umax; + integer i__, j, kd; + real rpvgrw; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + ab_dim1 = *ldab; + ab_offset = 1 + ab_dim1 * 1; + ab -= ab_offset; + afb_dim1 = *ldafb; + afb_offset = 1 + afb_dim1 * 1; + afb -= afb_offset; + + /* Function Body */ + rpvgrw = 1.f; + kd = *ku + 1; + i__1 = *ncols; + for (j = 1; j <= i__1; ++j) { + amax = 0.f; + umax = 0.f; +/* Computing MAX */ + i__2 = j - *ku; +/* Computing MIN */ + i__4 = j + *kl; + i__3 = f2cmin(i__4,*n); + for (i__ = f2cmax(i__2,1); i__ <= i__3; ++i__) { +/* Computing MAX */ + r__2 = (r__1 = ab[kd + i__ - j + j * ab_dim1], abs(r__1)); + amax = f2cmax(r__2,amax); + } +/* Computing MAX */ + i__3 = j - *ku; + i__2 = j; + for (i__ = f2cmax(i__3,1); i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = (r__1 = afb[kd + i__ - j + j * afb_dim1], abs(r__1)); + umax = f2cmax(r__2,umax); + } + if (umax != 0.f) { +/* Computing MIN */ + r__1 = amax / umax; + rpvgrw = f2cmin(r__1,rpvgrw); + } + } + ret_val = rpvgrw; + return ret_val; +} /* sla_gbrpvgrw__ */ + diff --git a/lapack-netlib/SRC/sla_geamv.c b/lapack-netlib/SRC/sla_geamv.c new file mode 100644 index 000000000..e841a7ad2 --- /dev/null +++ b/lapack-netlib/SRC/sla_geamv.c @@ -0,0 +1,779 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SLA_GEAMV computes a matrix-vector product using a general matrix to calculate error bounds. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SLA_GEAMV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, */ +/* Y, INCY ) */ + +/* REAL ALPHA, BETA */ +/* INTEGER INCX, INCY, LDA, M, N, TRANS */ +/* REAL A( LDA, * ), X( * ), Y( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLA_GEAMV performs one of the matrix-vector operations */ +/* > */ +/* > y := alpha*abs(A)*abs(x) + beta*abs(y), */ +/* > or y := alpha*abs(A)**T*abs(x) + beta*abs(y), */ +/* > */ +/* > where alpha and beta are scalars, x and y are vectors and A is an */ +/* > m by n matrix. */ +/* > */ +/* > This function is primarily used in calculating error bounds. */ +/* > To protect against underflow during evaluation, components in */ +/* > the resulting vector are perturbed away from zero by (N+1) */ +/* > times the underflow threshold. To prevent unnecessarily large */ +/* > errors for block-structure embedded in general matrices, */ +/* > "symbolically" zero components are not perturbed. A zero */ +/* > entry is considered "symbolic" if all multiplications involved */ +/* > in computing that entry have at least one zero multiplicand. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is INTEGER */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) */ +/* > BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) */ +/* > BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) */ +/* > */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of the matrix A. */ +/* > M must be at least zero. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of the matrix A. */ +/* > N must be at least zero. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is REAL */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension ( LDA, n ) */ +/* > Before entry, the leading m by n part of the array A must */ +/* > contain the matrix of coefficients. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > f2cmax( 1, m ). */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is REAL array, dimension */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* > and at least */ +/* > ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* > Before entry, the incremented array X must contain the */ +/* > vector x. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is REAL */ +/* > On entry, BETA specifies the scalar beta. When BETA is */ +/* > supplied as zero then Y need not be set on input. */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Y */ +/* > \verbatim */ +/* > Y is REAL array, */ +/* > dimension at least */ +/* > ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* > and at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* > Before entry with BETA non-zero, the incremented array Y */ +/* > must contain the vector y. On exit, Y is overwritten by the */ +/* > updated vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > Unchanged on exit. */ +/* > */ +/* > Level 2 Blas routine. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date June 2017 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sla_geamv_(integer *trans, integer *m, integer *n, real + *alpha, real *a, integer *lda, real *x, integer *incx, real *beta, + real *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + integer info; + real temp; + integer lenx, leny; + extern integer ilatrans_(char *); + real safe1; + integer i__, j; + logical symb_zero__; + integer iy, jx, kx, ky; + extern real slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine (version 3.7.1) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* June 2017 */ + + +/* ===================================================================== */ + + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! (*trans == ilatrans_("N") || *trans == ilatrans_("T") || *trans == ilatrans_("C"))) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < f2cmax(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("SLA_GEAMV ", &info, (ftnlen)10); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { + return 0; + } + +/* Set LENX and LENY, the lengths of the vectors x and y, and set */ +/* up the start points in X and Y. */ + + if (*trans == ilatrans_("N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* Set SAFE1 essentially to be the underflow threshold times the */ +/* number of additions in each row. */ + + safe1 = slamch_("Safe minimum"); + safe1 = (*n + 1) * safe1; + +/* Form y := alpha*abs(A)*abs(x) + beta*abs(y). */ + +/* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to */ +/* the inexact flag. Still doesn't help change the iteration order */ +/* to per-column. */ + + iy = ky; + if (*incx == 1) { + if (*trans == ilatrans_("N")) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.f) { + symb_zero__ = TRUE_; + y[iy] = 0.f; + } else if (y[iy] == 0.f) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (r__1 = y[iy], abs(r__1)); + } + if (*alpha != 0.f) { + i__2 = lenx; + for (j = 1; j <= i__2; ++j) { + temp = (r__1 = a[i__ + j * a_dim1], abs(r__1)); + symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == + 0.f); + y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp; + } + } + if (! symb_zero__) { + y[iy] += r_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.f) { + symb_zero__ = TRUE_; + y[iy] = 0.f; + } else if (y[iy] == 0.f) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (r__1 = y[iy], abs(r__1)); + } + if (*alpha != 0.f) { + i__2 = lenx; + for (j = 1; j <= i__2; ++j) { + temp = (r__1 = a[j + i__ * a_dim1], abs(r__1)); + symb_zero__ = symb_zero__ && (x[j] == 0.f || temp == + 0.f); + y[iy] += *alpha * (r__1 = x[j], abs(r__1)) * temp; + } + } + if (! symb_zero__) { + y[iy] += r_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } + } else { + if (*trans == ilatrans_("N")) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.f) { + symb_zero__ = TRUE_; + y[iy] = 0.f; + } else if (y[iy] == 0.f) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (r__1 = y[iy], abs(r__1)); + } + if (*alpha != 0.f) { + jx = kx; + i__2 = lenx; + for (j = 1; j <= i__2; ++j) { + temp = (r__1 = a[i__ + j * a_dim1], abs(r__1)); + symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == + 0.f); + y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp; + jx += *incx; + } + } + if (! symb_zero__) { + y[iy] += r_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + if (*beta == 0.f) { + symb_zero__ = TRUE_; + y[iy] = 0.f; + } else if (y[iy] == 0.f) { + symb_zero__ = TRUE_; + } else { + symb_zero__ = FALSE_; + y[iy] = *beta * (r__1 = y[iy], abs(r__1)); + } + if (*alpha != 0.f) { + jx = kx; + i__2 = lenx; + for (j = 1; j <= i__2; ++j) { + temp = (r__1 = a[j + i__ * a_dim1], abs(r__1)); + symb_zero__ = symb_zero__ && (x[jx] == 0.f || temp == + 0.f); + y[iy] += *alpha * (r__1 = x[jx], abs(r__1)) * temp; + jx += *incx; + } + } + if (! symb_zero__) { + y[iy] += r_sign(&safe1, &y[iy]); + } + iy += *incy; + } + } + } + + return 0; + +/* End of SLA_GEAMV */ + +} /* sla_geamv__ */ + diff --git a/lapack-netlib/SRC/sla_gercond.c b/lapack-netlib/SRC/sla_gercond.c new file mode 100644 index 000000000..1ce7fc670 --- /dev/null +++ b/lapack-netlib/SRC/sla_gercond.c @@ -0,0 +1,735 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SLA_GERCOND estimates the Skeel condition number for a general matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SLA_GERCOND + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* REAL FUNCTION SLA_GERCOND ( TRANS, N, A, LDA, AF, LDAF, IPIV, */ +/* CMODE, C, INFO, WORK, IWORK ) */ + +/* CHARACTER TRANS */ +/* INTEGER N, LDA, LDAF, INFO, CMODE */ +/* INTEGER IPIV( * ), IWORK( * ) */ +/* REAL A( LDA, * ), AF( LDAF, * ), WORK( * ), */ +/* $ C( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLA_GERCOND estimates the Skeel condition number of op(A) * op2(C) */ +/* > where op2 is determined by CMODE as follows */ +/* > CMODE = 1 op2(C) = C */ +/* > CMODE = 0 op2(C) = I */ +/* > CMODE = -1 op2(C) = inv(C) */ +/* > The Skeel condition number cond(A) = norminf( |inv(A)||A| ) */ +/* > is computed by computing scaling factors R such that */ +/* > diag(R)*A*op2(C) is row equilibrated and computing the standard */ +/* > infinity-norm condition number. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T * X = B (Transpose) */ +/* > = 'C': A**H * X = B (Conjugate Transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is REAL array, dimension (LDAF,N) */ +/* > The factors L and U from the factorization */ +/* > A = P*L*U as computed by SGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from the factorization A = P*L*U */ +/* > as computed by SGETRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CMODE */ +/* > \verbatim */ +/* > CMODE is INTEGER */ +/* > Determines op2(C) in the formula op(A) * op2(C) as follows: */ +/* > CMODE = 1 op2(C) = C */ +/* > CMODE = 0 op2(C) = I */ +/* > CMODE = -1 op2(C) = inv(C) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (N) */ +/* > The vector C in the formula op(A) * op2(C). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. */ +/* > i > 0: The ith argument is invalid. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is REAL array, dimension (3*N). */ +/* > Workspace. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N). */ +/* > Workspace.2 */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +real sla_gercond_(char *trans, integer *n, real *a, integer *lda, real *af, + integer *ldaf, integer *ipiv, integer *cmode, real *c__, integer * + info, real *work, integer *iwork) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; + real ret_val, r__1; + + /* Local variables */ + integer kase, i__, j; + extern logical lsame_(char *, char *); + integer isave[3]; + extern /* Subroutine */ int slacn2_(integer *, real *, real *, integer *, + real *, integer *, integer *), xerbla_(char *, integer *, ftnlen); + real ainvnm; + extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, + integer *, integer *, real *, integer *, integer *); + real tmp; + logical notrans; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --c__; + --work; + --iwork; + + /* Function Body */ + ret_val = 0.f; + + *info = 0; + notrans = lsame_(trans, "N"); + if (! notrans && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < f2cmax(1,*n)) { + *info = -4; + } else if (*ldaf < f2cmax(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLA_GERCOND", &i__1, (ftnlen)11); + return ret_val; + } + if (*n == 0) { + ret_val = 1.f; + return ret_val; + } + +/* Compute the equilibration matrix R such that */ +/* inv(R)*A*C has unit 1-norm. */ + + if (notrans) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.f; + if (*cmode == 1) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (r__1 = a[i__ + j * a_dim1] * c__[j], abs(r__1)); + } + } else if (*cmode == 0) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (r__1 = a[i__ + j * a_dim1], abs(r__1)); + } + } else { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (r__1 = a[i__ + j * a_dim1] / c__[j], abs(r__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + tmp = 0.f; + if (*cmode == 1) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (r__1 = a[j + i__ * a_dim1] * c__[j], abs(r__1)); + } + } else if (*cmode == 0) { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (r__1 = a[j + i__ * a_dim1], abs(r__1)); + } + } else { + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + tmp += (r__1 = a[j + i__ * a_dim1] / c__[j], abs(r__1)); + } + } + work[(*n << 1) + i__] = tmp; + } + } + +/* Estimate the norm of inv(op(A)). */ + + ainvnm = 0.f; + kase = 0; +L10: + slacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == 2) { + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + if (notrans) { + sgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[ + 1], &work[1], n, info); + } else { + sgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1], + &work[1], n, info); + } + +/* Multiply by inv(C). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + } else { + +/* Multiply by inv(C**T). */ + + if (*cmode == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] /= c__[i__]; + } + } else if (*cmode == -1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= c__[i__]; + } + } + if (notrans) { + sgetrs_("Transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[1], + &work[1], n, info); + } else { + sgetrs_("No transpose", n, &c__1, &af[af_offset], ldaf, &ipiv[ + 1], &work[1], n, info); + } + +/* Multiply by R. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] *= work[(*n << 1) + i__]; + } + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.f) { + ret_val = 1.f / ainvnm; + } + + return ret_val; + +} /* sla_gercond__ */ + diff --git a/lapack-netlib/SRC/sla_gerfsx_extended.c b/lapack-netlib/SRC/sla_gerfsx_extended.c new file mode 100644 index 000000000..c1446ca91 --- /dev/null +++ b/lapack-netlib/SRC/sla_gerfsx_extended.c @@ -0,0 +1,1119 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SLA_GERFSX_EXTENDED improves the computed solution to a system of linear equations for general +matrices by performing extra-precise iterative refinement and provides error bounds and backward error + estimates for the solution. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SLA_GERFSX_EXTENDED + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE SLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, */ +/* LDA, AF, LDAF, IPIV, COLEQU, C, B, */ +/* LDB, Y, LDY, BERR_OUT, N_NORMS, */ +/* ERRS_N, ERRS_C, RES, */ +/* AYB, DY, Y_TAIL, RCOND, ITHRESH, */ +/* RTHRESH, DZ_UB, IGNORE_CWISE, */ +/* INFO ) */ + +/* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, */ +/* $ TRANS_TYPE, N_NORMS, ITHRESH */ +/* LOGICAL COLEQU, IGNORE_CWISE */ +/* REAL RTHRESH, DZ_UB */ +/* INTEGER IPIV( * ) */ +/* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), */ +/* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) */ +/* REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), */ +/* $ ERRS_N( NRHS, * ), */ +/* $ ERRS_C( NRHS, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLA_GERFSX_EXTENDED improves the computed solution to a system of */ +/* > linear equations by performing extra-precise iterative refinement */ +/* > and provides error bounds and backward error estimates for the solution. */ +/* > This subroutine is called by SGERFSX to perform iterative refinement. */ +/* > In addition to normwise error bound, the code provides maximum */ +/* > componentwise error bound if possible. See comments for ERRS_N */ +/* > and ERRS_C for details of the error bounds. Note that this */ +/* > subroutine is only resonsible for setting the second fields of */ +/* > ERRS_N and ERRS_C. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] PREC_TYPE */ +/* > \verbatim */ +/* > PREC_TYPE is INTEGER */ +/* > Specifies the intermediate precision to be used in refinement. */ +/* > The value is defined by ILAPREC(P) where P is a CHARACTER and P */ +/* > = 'S': Single */ +/* > = 'D': Double */ +/* > = 'I': Indigenous */ +/* > = 'X' or 'E': Extra */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS_TYPE */ +/* > \verbatim */ +/* > TRANS_TYPE is INTEGER */ +/* > Specifies the transposition operation on A. */ +/* > The value is defined by ILATRANS(T) where T is a CHARACTER and T */ +/* > = 'N': No transpose */ +/* > = 'T': Transpose */ +/* > = 'C': Conjugate transpose */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right-hand-sides, i.e., the number of columns of the */ +/* > matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is REAL array, dimension (LDAF,N) */ +/* > The factors L and U from the factorization */ +/* > A = P*L*U as computed by SGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from the factorization A = P*L*U */ +/* > as computed by SGETRF; row i of the matrix was interchanged */ +/* > with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] COLEQU */ +/* > \verbatim */ +/* > COLEQU is LOGICAL */ +/* > If .TRUE. then column equilibration was done to A before calling */ +/* > this routine. This is needed to compute the solution and error */ +/* > bounds correctly. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is REAL array, dimension (N) */ +/* > The column scale factors for A. If COLEQU = .FALSE., C */ +/* > is not accessed. If C is input, each element of C should be a power */ +/* > of the radix to ensure a reliable solution and error estimates. */ +/* > Scaling by powers of the radix does not cause rounding errors unless */ +/* > the result underflows or overflows. Rounding errors during scaling */ +/* > lead to refining with a matrix that is not equivalent to the */ +/* > input matrix, producing error estimates that may not be */ +/* > reliable. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is REAL array, dimension (LDB,NRHS) */ +/* > The right-hand-side matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Y */ +/* > \verbatim */ +/* > Y is REAL array, dimension (LDY,NRHS) */ +/* > On entry, the solution matrix X, as computed by SGETRS. */ +/* > On exit, the improved solution matrix Y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDY */ +/* > \verbatim */ +/* > LDY is INTEGER */ +/* > The leading dimension of the array Y. LDY >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BERR_OUT */ +/* > \verbatim */ +/* > BERR_OUT is REAL array, dimension (NRHS) */ +/* > On exit, BERR_OUT(j) contains the componentwise relative backward */ +/* > error for right-hand-side j from the formula */ +/* > f2cmax(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* > where abs(Z) is the componentwise absolute value of the matrix */ +/* > or vector Z. This is computed by SLA_LIN_BERR. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N_NORMS */ +/* > \verbatim */ +/* > N_NORMS is INTEGER */ +/* > Determines which error bounds to return (see ERRS_N */ +/* > and ERRS_C). */ +/* > If N_NORMS >= 1 return normwise error bounds. */ +/* > If N_NORMS >= 2 return componentwise error bounds. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ERRS_N */ +/* > \verbatim */ +/* > ERRS_N is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > normwise relative error, which is defined as follows: */ +/* > */ +/* > Normwise relative error in the ith solution vector: */ +/* > max_j (abs(XTRUE(j,i) - X(j,i))) */ +/* > ------------------------------ */ +/* > max_j abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the type of error information as described */ +/* > below. There currently are up to three pieces of information */ +/* > returned. */ +/* > */ +/* > The first index in ERRS_N(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERRS_N(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated normwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*A, where S scales each row by a power of the */ +/* > radix so all absolute row sums of Z are approximately 1. */ +/* > */ +/* > This subroutine is only responsible for setting the second field */ +/* > above. */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ERRS_C */ +/* > \verbatim */ +/* > ERRS_C is REAL array, dimension (NRHS, N_ERR_BNDS) */ +/* > For each right-hand side, this array contains information about */ +/* > various error bounds and condition numbers corresponding to the */ +/* > componentwise relative error, which is defined as follows: */ +/* > */ +/* > Componentwise relative error in the ith solution vector: */ +/* > abs(XTRUE(j,i) - X(j,i)) */ +/* > max_j ---------------------- */ +/* > abs(X(j,i)) */ +/* > */ +/* > The array is indexed by the right-hand side i (on which the */ +/* > componentwise relative error depends), and the type of error */ +/* > information as described below. There currently are up to three */ +/* > pieces of information returned for each right-hand side. If */ +/* > componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ +/* > ERRS_C is not accessed. If N_ERR_BNDS < 3, then at most */ +/* > the first (:,N_ERR_BNDS) entries are returned. */ +/* > */ +/* > The first index in ERRS_C(i,:) corresponds to the ith */ +/* > right-hand side. */ +/* > */ +/* > The second index in ERRS_C(:,err) contains the following */ +/* > three fields: */ +/* > err = 1 "Trust/don't trust" boolean. Trust the answer if the */ +/* > reciprocal condition number is less than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). */ +/* > */ +/* > err = 2 "Guaranteed" error bound: The estimated forward error, */ +/* > almost certainly within a factor of 10 of the true error */ +/* > so long as the next entry is greater than the threshold */ +/* > sqrt(n) * slamch('Epsilon'). This error bound should only */ +/* > be trusted if the previous boolean is true. */ +/* > */ +/* > err = 3 Reciprocal condition number: Estimated componentwise */ +/* > reciprocal condition number. Compared with the threshold */ +/* > sqrt(n) * slamch('Epsilon') to determine if the error */ +/* > estimate is "guaranteed". These reciprocal condition */ +/* > numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ +/* > appropriately scaled matrix Z. */ +/* > Let Z = S*(A*diag(x)), where x is the solution for the */ +/* > current right-hand side and S scales each row of */ +/* > A*diag(x) by a power of the radix so all absolute row */ +/* > sums of Z are approximately 1. */ +/* > */ +/* > This subroutine is only responsible for setting the second field */ +/* > above. */ +/* > See Lapack Working Note 165 for further details and extra */ +/* > cautions. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RES */ +/* > \verbatim */ +/* > RES is REAL array, dimension (N) */ +/* > Workspace to hold the intermediate residual. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AYB */ +/* > \verbatim */ +/* > AYB is REAL array, dimension (N) */ +/* > Workspace. This can be the same workspace passed for Y_TAIL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DY */ +/* > \verbatim */ +/* > DY is REAL array, dimension (N) */ +/* > Workspace to hold the intermediate solution. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Y_TAIL */ +/* > \verbatim */ +/* > Y_TAIL is REAL array, dimension (N) */ +/* > Workspace to hold the trailing bits of the intermediate solution. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is REAL */ +/* > Reciprocal scaled condition number. This is an estimate of the */ +/* > reciprocal Skeel condition number of the matrix A after */ +/* > equilibration (if done). If this is less than the machine */ +/* > precision (in particular, if it is zero), the matrix is singular */ +/* > to working precision. Note that the error may still be small even */ +/* > if this number is very small and the matrix appears ill- */ +/* > conditioned. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ITHRESH */ +/* > \verbatim */ +/* > ITHRESH is INTEGER */ +/* > The maximum number of residual computations allowed for */ +/* > refinement. The default is 10. For 'aggressive' set to 100 to */ +/* > permit convergence using approximate factorizations or */ +/* > factorizations other than LU. If the factorization uses a */ +/* > technique other than Gaussian elimination, the guarantees in */ +/* > ERRS_N and ERRS_C may no longer be trustworthy. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RTHRESH */ +/* > \verbatim */ +/* > RTHRESH is REAL */ +/* > Determines when to stop refinement if the error estimate stops */ +/* > decreasing. Refinement will stop when the next solution no longer */ +/* > satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ +/* > the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ +/* > default value is 0.5. For 'aggressive' set to 0.9 to permit */ +/* > convergence on extremely ill-conditioned matrices. See LAWN 165 */ +/* > for more details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DZ_UB */ +/* > \verbatim */ +/* > DZ_UB is REAL */ +/* > Determines when to start considering componentwise convergence. */ +/* > Componentwise convergence is only considered after each component */ +/* > of the solution Y is stable, which we definte as the relative */ +/* > change in each component being less than DZ_UB. The default value */ +/* > is 0.25, requiring the first bit to be stable. See LAWN 165 for */ +/* > more details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IGNORE_CWISE */ +/* > \verbatim */ +/* > IGNORE_CWISE is LOGICAL */ +/* > If .TRUE. then ignore componentwise convergence. Default value */ +/* > is .FALSE.. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: Successful exit. */ +/* > < 0: if INFO = -i, the ith argument to SGETRS had an illegal */ +/* > value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int sla_gerfsx_extended_(integer *prec_type__, integer * + trans_type__, integer *n, integer *nrhs, real *a, integer *lda, real * + af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, real *b, + integer *ldb, real *y, integer *ldy, real *berr_out__, integer * + n_norms__, real *errs_n__, real *errs_c__, real *res, real *ayb, real + *dy, real *y_tail__, real *rcond, integer *ithresh, real *rthresh, + real *dz_ub__, logical *ignore_cwise__, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, + y_offset, errs_n_dim1, errs_n_offset, errs_c_dim1, errs_c_offset, + i__1, i__2, i__3; + real r__1, r__2; + char ch__1[1]; + + /* Local variables */ + real dx_x__, dz_z__, ymin; + extern /* Subroutine */ int sla_lin_berr_(integer *, integer *, integer * + , real *, real *, real *); + real dxratmax; + extern /* Subroutine */ int blas_sgemv_x_(integer *, integer *, integer * + , real *, real *, integer *, real *, integer *, real *, real *, + integer *, integer *); + real dzratmax; + integer y_prec_state__, i__, j; + extern /* Subroutine */ int blas_sgemv2_x_(integer *, integer *, integer + *, real *, real *, integer *, real *, real *, integer *, real *, + real *, integer *, integer *), sla_geamv_(integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + real dxrat; + logical incr_prec__; + real dzrat; + char trans[1]; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + real normx, normy; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *); + real myhugeval, prev_dz_z__, yk; + extern real slamch_(char *); + real final_dx_x__, final_dz_z__, normdx; + extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, + integer *, integer *, real *, integer *, integer *), + sla_wwaddw_(integer *, real *, real *, real *); + extern /* Character */ VOID chla_transtype_(char *, integer *); + real prevnormdx; + integer cnt; + real dyk, eps; + integer x_state__, z_state__; + real incr_thresh__; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + errs_c_dim1 = *nrhs; + errs_c_offset = 1 + errs_c_dim1 * 1; + errs_c__ -= errs_c_offset; + errs_n_dim1 = *nrhs; + errs_n_offset = 1 + errs_n_dim1 * 1; + errs_n__ -= errs_n_offset; + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + --ipiv; + --c__; + b_dim1 = *ldb; + b_offset = 1 + b_dim1 * 1; + b -= b_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1 * 1; + y -= y_offset; + --berr_out__; + --res; + --ayb; + --dy; + --y_tail__; + + /* Function Body */ + if (*info != 0) { + return 0; + } + chla_transtype_(ch__1, trans_type__); + *(unsigned char *)trans = *(unsigned char *)&ch__1[0]; + eps = slamch_("Epsilon"); + myhugeval = slamch_("Overflow"); +/* Force MYHUGEVAL to Inf */ + myhugeval *= myhugeval; +/* Using MYHUGEVAL may lead to spurious underflows. */ + incr_thresh__ = (real) (*n) * eps; + + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + y_prec_state__ = 1; + if (y_prec_state__ == 2) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + y_tail__[i__] = 0.f; + } + } + dxrat = 0.f; + dxratmax = 0.f; + dzrat = 0.f; + dzratmax = 0.f; + final_dx_x__ = myhugeval; + final_dz_z__ = myhugeval; + prevnormdx = myhugeval; + prev_dz_z__ = myhugeval; + dz_z__ = myhugeval; + dx_x__ = myhugeval; + x_state__ = 1; + z_state__ = 0; + incr_prec__ = FALSE_; + i__2 = *ithresh; + for (cnt = 1; cnt <= i__2; ++cnt) { + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + scopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + if (y_prec_state__ == 0) { + sgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + + 1], &c__1, &c_b8, &res[1], &c__1); + } else if (y_prec_state__ == 1) { + blas_sgemv_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, & + y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1, + prec_type__); + } else { + blas_sgemv2_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, + &y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b8, &res[ + 1], &c__1, prec_type__); + } +/* XXX: RES is no longer needed. */ + scopy_(n, &res[1], &c__1, &dy[1], &c__1); + sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &dy[1], + n, info); + +/* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ + + normx = 0.f; + normy = 0.f; + normdx = 0.f; + dz_z__ = 0.f; + ymin = myhugeval; + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + yk = (r__1 = y[i__ + j * y_dim1], abs(r__1)); + dyk = (r__1 = dy[i__], abs(r__1)); + if (yk != 0.f) { +/* Computing MAX */ + r__1 = dz_z__, r__2 = dyk / yk; + dz_z__ = f2cmax(r__1,r__2); + } else if (dyk != 0.f) { + dz_z__ = myhugeval; + } + ymin = f2cmin(ymin,yk); + normy = f2cmax(normy,yk); + if (*colequ) { +/* Computing MAX */ + r__1 = normx, r__2 = yk * c__[i__]; + normx = f2cmax(r__1,r__2); +/* Computing MAX */ + r__1 = normdx, r__2 = dyk * c__[i__]; + normdx = f2cmax(r__1,r__2); + } else { + normx = normy; + normdx = f2cmax(normdx,dyk); + } + } + if (normx != 0.f) { + dx_x__ = normdx / normx; + } else if (normdx == 0.f) { + dx_x__ = 0.f; + } else { + dx_x__ = myhugeval; + } + dxrat = normdx / prevnormdx; + dzrat = dz_z__ / prev_dz_z__; + +/* Check termination criteria */ + + if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy + && y_prec_state__ < 2) { + incr_prec__ = TRUE_; + } + if (x_state__ == 3 && dxrat <= *rthresh) { + x_state__ = 1; + } + if (x_state__ == 1) { + if (dx_x__ <= eps) { + x_state__ = 2; + } else if (dxrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + x_state__ = 3; + } + } else { + if (dxrat > dxratmax) { + dxratmax = dxrat; + } + } + if (x_state__ > 1) { + final_dx_x__ = dx_x__; + } + } + if (z_state__ == 0 && dz_z__ <= *dz_ub__) { + z_state__ = 1; + } + if (z_state__ == 3 && dzrat <= *rthresh) { + z_state__ = 1; + } + if (z_state__ == 1) { + if (dz_z__ <= eps) { + z_state__ = 2; + } else if (dz_z__ > *dz_ub__) { + z_state__ = 0; + dzratmax = 0.f; + final_dz_z__ = myhugeval; + } else if (dzrat > *rthresh) { + if (y_prec_state__ != 2) { + incr_prec__ = TRUE_; + } else { + z_state__ = 3; + } + } else { + if (dzrat > dzratmax) { + dzratmax = dzrat; + } + } + if (z_state__ > 1) { + final_dz_z__ = dz_z__; + } + } + +/* Exit if both normwise and componentwise stopped working, */ +/* but if componentwise is unstable, let it go at least two */ +/* iterations. */ + + if (x_state__ != 1) { + if (*ignore_cwise__) { + goto L666; + } + if (z_state__ == 3 || z_state__ == 2) { + goto L666; + } + if (z_state__ == 0 && cnt > 1) { + goto L666; + } + } + if (incr_prec__) { + incr_prec__ = FALSE_; + ++y_prec_state__; + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + y_tail__[i__] = 0.f; + } + } + prevnormdx = normdx; + prev_dz_z__ = dz_z__; + +/* Update soluton. */ + + if (y_prec_state__ < 2) { + saxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); + } else { + sla_wwaddw_(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); + } + } +/* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't CALL MYEXIT. */ +L666: + +/* Set final_* when cnt hits ithresh. */ + + if (x_state__ == 1) { + final_dx_x__ = dx_x__; + } + if (z_state__ == 1) { + final_dz_z__ = dz_z__; + } + +/* Compute error bounds */ + + if (*n_norms__ >= 1) { + errs_n__[j + (errs_n_dim1 << 1)] = final_dx_x__ / (1 - dxratmax); + } + if (*n_norms__ >= 2) { + errs_c__[j + (errs_c_dim1 << 1)] = final_dz_z__ / (1 - dzratmax); + } + +/* Compute componentwise relative backward error from formula */ +/* f2cmax(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ +/* where abs(Z) is the componentwise absolute value of the matrix */ +/* or vector Z. */ + +/* Compute residual RES = B_s - op(A_s) * Y, */ +/* op(A) = A, A**T, or A**H depending on TRANS (and type). */ + + scopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); + sgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 1], & + c__1, &c_b8, &res[1], &c__1); + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + ayb[i__] = (r__1 = b[i__ + j * b_dim1], abs(r__1)); + } + +/* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ + + sla_geamv_(trans_type__, n, n, &c_b8, &a[a_offset], lda, &y[j * + y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1); + sla_lin_berr_(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); + +/* End of loop for each RHS. */ + + } + + return 0; +} /* sla_gerfsx_extended__ */ + diff --git a/lapack-netlib/SRC/sla_gerpvgrw.c b/lapack-netlib/SRC/sla_gerpvgrw.c new file mode 100644 index 000000000..0fc93c576 --- /dev/null +++ b/lapack-netlib/SRC/sla_gerpvgrw.c @@ -0,0 +1,542 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conj(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimag(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle() continue; +#define myceiling(w) {ceil(w)} +#define myhuge(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; + _Complex float zdotc = 0.0; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b SLA_GERPVGRW */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download SLA_GERPVGRW + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* REAL FUNCTION SLA_GERPVGRW( N, NCOLS, A, LDA, AF, LDAF ) */ + +/* INTEGER N, NCOLS, LDA, LDAF */ +/* REAL A( LDA, * ), AF( LDAF, * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > SLA_GERPVGRW computes the reciprocal pivot growth factor */ +/* > norm(A)/norm(U). The "f2cmax absolute element" norm is used. If this is */ +/* > much less than 1, the stability of the LU factorization of the */ +/* > (equilibrated) matrix A could be poor. This also means that the */ +/* > solution X, estimated condition numbers, and error bounds could be */ +/* > unreliable. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NCOLS */ +/* > \verbatim */ +/* > NCOLS is INTEGER */ +/* > The number of columns of the matrix A. NCOLS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is REAL array, dimension (LDA,N) */ +/* > On entry, the N-by-N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= f2cmax(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AF */ +/* > \verbatim */ +/* > AF is REAL array, dimension (LDAF,N) */ +/* > The factors L and U from the factorization */ +/* > A = P*L*U as computed by SGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDAF */ +/* > \verbatim */ +/* > LDAF is INTEGER */ +/* > The leading dimension of the array AF. LDAF >= f2cmax(1,N). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup realGEcomputational */ + +/* ===================================================================== */ +real sla_gerpvgrw_(integer *n, integer *ncols, real *a, integer *lda, real * + af, integer *ldaf) +{ + /* System generated locals */ + integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2; + real ret_val, r__1, r__2; + + /* Local variables */ + real amax, umax; + integer i__, j; + real rpvgrw; + + +/* -- LAPACK computational routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + + +/* ===================================================================== */ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1 * 1; + a -= a_offset; + af_dim1 = *ldaf; + af_offset = 1 + af_dim1 * 1; + af -= af_offset; + + /* Function Body */ + rpvgrw = 1.f; + i__1 = *ncols; + for (j = 1; j <= i__1; ++j) { + amax = 0.f; + umax = 0.f; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = (r__1 = a[i__ + j * a_dim1], abs(r__1)); + amax = f2cmax(r__2,amax); + } + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = (r__1 = af[i__ + j * af_dim1], abs(r__1)); + umax = f2cmax(r__2,umax); + } + if (umax != 0.f) { +/* Computing MIN */ + r__1 = amax / umax; + rpvgrw = f2cmin(r__1,rpvgrw); + } + } + ret_val = rpvgrw; + return ret_val; +} /* sla_gerpvgrw__ */ +